1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
4 #include <net/netfilter/nf_conntrack.h>
5 #include <net/netfilter/nf_conntrack_core.h>
6 #include <net/netfilter/nf_conntrack_zones.h>
7 #include <net/netfilter/nf_conntrack_labels.h>
8 #include <net/netfilter/nf_conntrack_helper.h>
9 #include <net/netfilter/nf_conntrack_acct.h>
10 #include <uapi/linux/tc_act/tc_pedit.h>
11 #include <net/tc_act/tc_ct.h>
12 #include <net/flow_offload.h>
13 #include <net/netfilter/nf_flow_table.h>
14 #include <linux/workqueue.h>
15 #include <linux/xarray.h>
17 #include "lib/fs_chains.h"
19 #include "en/mod_hdr.h"
20 #include "en/mapping.h"
25 #define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8)
26 #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
27 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
28 #define MLX5_CT_STATE_TRK_BIT BIT(2)
29 #define MLX5_CT_STATE_NAT_BIT BIT(3)
31 #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
32 #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
33 #define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
35 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen * 8)
36 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
38 #define ct_dbg(fmt, args...)\
39 netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
41 struct mlx5_tc_ct_priv {
42 struct mlx5_core_dev *dev;
43 const struct net_device *netdev;
44 struct mod_hdr_tbl *mod_hdr_tbl;
46 struct xarray tuple_ids;
47 struct rhashtable zone_ht;
48 struct rhashtable ct_tuples_ht;
49 struct rhashtable ct_tuples_nat_ht;
50 struct mlx5_flow_table *ct;
51 struct mlx5_flow_table *ct_nat;
52 struct mlx5_flow_table *post_ct;
53 struct mutex control_lock; /* guards parallel adds/dels */
54 struct mutex shared_counter_lock;
55 struct mapping_ctx *zone_mapping;
56 struct mapping_ctx *labels_mapping;
57 enum mlx5_flow_namespace_type ns_type;
58 struct mlx5_fs_chains *chains;
62 struct mlx5_flow_attr *pre_ct_attr;
63 struct mlx5_flow_attr *post_ct_attr;
64 struct mlx5_flow_handle *pre_ct_rule;
65 struct mlx5_flow_handle *post_ct_rule;
66 struct mlx5_ct_ft *ft;
71 struct mlx5_ct_zone_rule {
72 struct mlx5_flow_handle *rule;
73 struct mlx5e_mod_hdr_handle *mh;
74 struct mlx5_flow_attr *attr;
78 struct mlx5_tc_ct_pre {
79 struct mlx5_flow_table *ft;
80 struct mlx5_flow_group *flow_grp;
81 struct mlx5_flow_group *miss_grp;
82 struct mlx5_flow_handle *flow_rule;
83 struct mlx5_flow_handle *miss_rule;
84 struct mlx5_modify_hdr *modify_hdr;
88 struct rhash_head node;
92 struct nf_flowtable *nf_ft;
93 struct mlx5_tc_ct_priv *ct_priv;
94 struct rhashtable ct_entries_ht;
95 struct mlx5_tc_ct_pre pre_ct;
96 struct mlx5_tc_ct_pre pre_ct_nat;
99 struct mlx5_ct_tuple {
106 struct in6_addr src_v6;
110 struct in6_addr dst_v6;
121 struct mlx5_ct_counter {
122 struct mlx5_fc *counter;
127 struct mlx5_ct_entry {
128 struct rhash_head node;
129 struct rhash_head tuple_node;
130 struct rhash_head tuple_nat_node;
131 struct mlx5_ct_counter *counter;
132 unsigned long cookie;
133 unsigned long restore_cookie;
134 struct mlx5_ct_tuple tuple;
135 struct mlx5_ct_tuple tuple_nat;
136 struct mlx5_ct_zone_rule zone_rules[2];
139 static const struct rhashtable_params cts_ht_params = {
140 .head_offset = offsetof(struct mlx5_ct_entry, node),
141 .key_offset = offsetof(struct mlx5_ct_entry, cookie),
142 .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
143 .automatic_shrinking = true,
144 .min_size = 16 * 1024,
147 static const struct rhashtable_params zone_params = {
148 .head_offset = offsetof(struct mlx5_ct_ft, node),
149 .key_offset = offsetof(struct mlx5_ct_ft, zone),
150 .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
151 .automatic_shrinking = true,
154 static const struct rhashtable_params tuples_ht_params = {
155 .head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
156 .key_offset = offsetof(struct mlx5_ct_entry, tuple),
157 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
158 .automatic_shrinking = true,
159 .min_size = 16 * 1024,
162 static const struct rhashtable_params tuples_nat_ht_params = {
163 .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
164 .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
165 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
166 .automatic_shrinking = true,
167 .min_size = 16 * 1024,
171 mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
173 return !!(entry->tuple_nat_node.next);
177 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
179 struct flow_match_control control;
180 struct flow_match_basic basic;
182 flow_rule_match_basic(rule, &basic);
183 flow_rule_match_control(rule, &control);
185 tuple->n_proto = basic.key->n_proto;
186 tuple->ip_proto = basic.key->ip_proto;
187 tuple->addr_type = control.key->addr_type;
189 if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
190 struct flow_match_ipv4_addrs match;
192 flow_rule_match_ipv4_addrs(rule, &match);
193 tuple->ip.src_v4 = match.key->src;
194 tuple->ip.dst_v4 = match.key->dst;
195 } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
196 struct flow_match_ipv6_addrs match;
198 flow_rule_match_ipv6_addrs(rule, &match);
199 tuple->ip.src_v6 = match.key->src;
200 tuple->ip.dst_v6 = match.key->dst;
205 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
206 struct flow_match_ports match;
208 flow_rule_match_ports(rule, &match);
209 switch (tuple->ip_proto) {
212 tuple->port.src = match.key->src;
213 tuple->port.dst = match.key->dst;
226 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
227 struct flow_rule *rule)
229 struct flow_action *flow_action = &rule->action;
230 struct flow_action_entry *act;
231 u32 offset, val, ip6_offset;
234 flow_action_for_each(i, act, flow_action) {
235 if (act->id != FLOW_ACTION_MANGLE)
238 offset = act->mangle.offset;
239 val = act->mangle.val;
240 switch (act->mangle.htype) {
241 case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
242 if (offset == offsetof(struct iphdr, saddr))
243 tuple->ip.src_v4 = cpu_to_be32(val);
244 else if (offset == offsetof(struct iphdr, daddr))
245 tuple->ip.dst_v4 = cpu_to_be32(val);
250 case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
251 ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
254 tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
255 else if (ip6_offset < 8)
256 tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
261 case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
262 if (offset == offsetof(struct tcphdr, source))
263 tuple->port.src = cpu_to_be16(val);
264 else if (offset == offsetof(struct tcphdr, dest))
265 tuple->port.dst = cpu_to_be16(val);
270 case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
271 if (offset == offsetof(struct udphdr, source))
272 tuple->port.src = cpu_to_be16(val);
273 else if (offset == offsetof(struct udphdr, dest))
274 tuple->port.dst = cpu_to_be16(val);
288 mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
289 struct flow_rule *rule)
291 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
293 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
298 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
299 struct flow_match_basic match;
301 flow_rule_match_basic(rule, &match);
303 mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
305 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
306 match.mask->ip_proto);
307 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
308 match.key->ip_proto);
310 ip_proto = match.key->ip_proto;
313 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
314 struct flow_match_control match;
316 flow_rule_match_control(rule, &match);
317 addr_type = match.key->addr_type;
320 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
321 struct flow_match_ipv4_addrs match;
323 flow_rule_match_ipv4_addrs(rule, &match);
324 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
325 src_ipv4_src_ipv6.ipv4_layout.ipv4),
326 &match.mask->src, sizeof(match.mask->src));
327 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
328 src_ipv4_src_ipv6.ipv4_layout.ipv4),
329 &match.key->src, sizeof(match.key->src));
330 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
331 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
332 &match.mask->dst, sizeof(match.mask->dst));
333 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
334 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
335 &match.key->dst, sizeof(match.key->dst));
338 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
339 struct flow_match_ipv6_addrs match;
341 flow_rule_match_ipv6_addrs(rule, &match);
342 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
343 src_ipv4_src_ipv6.ipv6_layout.ipv6),
344 &match.mask->src, sizeof(match.mask->src));
345 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
346 src_ipv4_src_ipv6.ipv6_layout.ipv6),
347 &match.key->src, sizeof(match.key->src));
349 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
350 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
351 &match.mask->dst, sizeof(match.mask->dst));
352 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
353 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
354 &match.key->dst, sizeof(match.key->dst));
357 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
358 struct flow_match_ports match;
360 flow_rule_match_ports(rule, &match);
363 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
364 tcp_sport, ntohs(match.mask->src));
365 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
366 tcp_sport, ntohs(match.key->src));
368 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
369 tcp_dport, ntohs(match.mask->dst));
370 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
371 tcp_dport, ntohs(match.key->dst));
375 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
376 udp_sport, ntohs(match.mask->src));
377 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
378 udp_sport, ntohs(match.key->src));
380 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
381 udp_dport, ntohs(match.mask->dst));
382 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
383 udp_dport, ntohs(match.key->dst));
390 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
391 struct flow_match_tcp match;
393 flow_rule_match_tcp(rule, &match);
394 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
395 ntohs(match.mask->flags));
396 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
397 ntohs(match.key->flags));
404 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
406 if (entry->counter->is_shared &&
407 !refcount_dec_and_test(&entry->counter->refcount))
410 mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
411 kfree(entry->counter);
415 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
416 struct mlx5_ct_entry *entry,
419 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
420 struct mlx5_flow_attr *attr = zone_rule->attr;
422 ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
424 mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
425 mlx5e_mod_hdr_detach(ct_priv->dev,
426 ct_priv->mod_hdr_tbl, zone_rule->mh);
427 mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
432 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
433 struct mlx5_ct_entry *entry)
435 mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
436 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
439 static struct flow_action_entry *
440 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
442 struct flow_action *flow_action = &flow_rule->action;
443 struct flow_action_entry *act;
446 flow_action_for_each(i, act, flow_action) {
447 if (act->id == FLOW_ACTION_CT_METADATA)
455 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
456 struct mlx5e_tc_mod_hdr_acts *mod_acts,
462 enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
463 struct mlx5_core_dev *dev = ct_priv->dev;
466 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
467 CTSTATE_TO_REG, ct_state);
471 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
476 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
477 LABELS_TO_REG, labels_id);
481 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
482 ZONE_RESTORE_TO_REG, zone_restore_id);
486 /* Make another copy of zone id in reg_b for
487 * NIC rx flows since we don't copy reg_c1 to
490 if (ns != MLX5_FLOW_NAMESPACE_FDB) {
491 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
492 NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
500 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
503 u32 offset = act->mangle.offset, field;
505 switch (act->mangle.htype) {
506 case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
507 MLX5_SET(set_action_in, modact, length, 0);
508 if (offset == offsetof(struct iphdr, saddr))
509 field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
510 else if (offset == offsetof(struct iphdr, daddr))
511 field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
516 case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
517 MLX5_SET(set_action_in, modact, length, 0);
518 if (offset == offsetof(struct ipv6hdr, saddr) + 12)
519 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
520 else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
521 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
522 else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
523 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
524 else if (offset == offsetof(struct ipv6hdr, saddr))
525 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
526 else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
527 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
528 else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
529 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
530 else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
531 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
532 else if (offset == offsetof(struct ipv6hdr, daddr))
533 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
538 case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
539 MLX5_SET(set_action_in, modact, length, 16);
540 if (offset == offsetof(struct tcphdr, source))
541 field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
542 else if (offset == offsetof(struct tcphdr, dest))
543 field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
548 case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
549 MLX5_SET(set_action_in, modact, length, 16);
550 if (offset == offsetof(struct udphdr, source))
551 field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
552 else if (offset == offsetof(struct udphdr, dest))
553 field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
562 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
563 MLX5_SET(set_action_in, modact, offset, 0);
564 MLX5_SET(set_action_in, modact, field, field);
565 MLX5_SET(set_action_in, modact, data, act->mangle.val);
571 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
572 struct flow_rule *flow_rule,
573 struct mlx5e_tc_mod_hdr_acts *mod_acts)
575 struct flow_action *flow_action = &flow_rule->action;
576 struct mlx5_core_dev *mdev = ct_priv->dev;
577 struct flow_action_entry *act;
582 action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
584 flow_action_for_each(i, act, flow_action) {
586 case FLOW_ACTION_MANGLE: {
587 err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type,
592 modact = mod_acts->actions +
593 mod_acts->num_actions * action_size;
595 err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
599 mod_acts->num_actions++;
603 case FLOW_ACTION_CT_METADATA:
604 /* Handled earlier */
615 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
616 struct mlx5_flow_attr *attr,
617 struct flow_rule *flow_rule,
618 struct mlx5e_mod_hdr_handle **mh,
619 u8 zone_restore_id, bool nat)
621 struct mlx5e_tc_mod_hdr_acts mod_acts = {};
622 struct flow_action_entry *meta;
626 meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
630 err = mapping_add(ct_priv->labels_mapping, meta->ct_metadata.labels,
631 &attr->ct_attr.ct_labels_id);
635 err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
640 ct_state |= MLX5_CT_STATE_NAT_BIT;
643 ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
644 err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
646 meta->ct_metadata.mark,
647 attr->ct_attr.ct_labels_id,
652 *mh = mlx5e_mod_hdr_attach(ct_priv->dev,
653 ct_priv->mod_hdr_tbl,
660 attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
662 dealloc_mod_hdr_actions(&mod_acts);
666 dealloc_mod_hdr_actions(&mod_acts);
667 mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
672 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
673 struct flow_rule *flow_rule,
674 struct mlx5_ct_entry *entry,
675 bool nat, u8 zone_restore_id)
677 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
678 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
679 struct mlx5_flow_spec *spec = NULL;
680 struct mlx5_flow_attr *attr;
683 zone_rule->nat = nat;
685 spec = kzalloc(sizeof(*spec), GFP_KERNEL);
689 attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
695 err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
697 zone_restore_id, nat);
699 ct_dbg("Failed to create ct entry mod hdr");
703 attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
704 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
705 MLX5_FLOW_CONTEXT_ACTION_COUNT;
706 attr->dest_chain = 0;
707 attr->dest_ft = ct_priv->post_ct;
708 attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
709 attr->outer_match_level = MLX5_MATCH_L4;
710 attr->counter = entry->counter->counter;
711 attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
713 mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
714 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
715 entry->tuple.zone & MLX5_CT_ZONE_MASK,
718 zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
719 if (IS_ERR(zone_rule->rule)) {
720 err = PTR_ERR(zone_rule->rule);
721 ct_dbg("Failed to add ct entry rule, nat: %d", nat);
725 zone_rule->attr = attr;
728 ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
733 mlx5e_mod_hdr_detach(ct_priv->dev,
734 ct_priv->mod_hdr_tbl, zone_rule->mh);
735 mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
743 static struct mlx5_ct_counter *
744 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
746 struct mlx5_ct_counter *counter;
749 counter = kzalloc(sizeof(*counter), GFP_KERNEL);
751 return ERR_PTR(-ENOMEM);
753 counter->is_shared = false;
754 counter->counter = mlx5_fc_create(ct_priv->dev, true);
755 if (IS_ERR(counter->counter)) {
756 ct_dbg("Failed to create counter for ct entry");
757 ret = PTR_ERR(counter->counter);
765 static struct mlx5_ct_counter *
766 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
767 struct mlx5_ct_entry *entry)
769 struct mlx5_ct_tuple rev_tuple = entry->tuple;
770 struct mlx5_ct_counter *shared_counter;
771 struct mlx5_ct_entry *rev_entry;
775 /* get the reversed tuple */
776 tmp_port = rev_tuple.port.src;
777 rev_tuple.port.src = rev_tuple.port.dst;
778 rev_tuple.port.dst = tmp_port;
780 if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
781 __be32 tmp_addr = rev_tuple.ip.src_v4;
783 rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
784 rev_tuple.ip.dst_v4 = tmp_addr;
785 } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
786 struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
788 rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
789 rev_tuple.ip.dst_v6 = tmp_addr;
791 return ERR_PTR(-EOPNOTSUPP);
794 /* Use the same counter as the reverse direction */
795 mutex_lock(&ct_priv->shared_counter_lock);
796 rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple,
799 if (refcount_inc_not_zero(&rev_entry->counter->refcount)) {
800 mutex_unlock(&ct_priv->shared_counter_lock);
801 return rev_entry->counter;
804 mutex_unlock(&ct_priv->shared_counter_lock);
806 shared_counter = mlx5_tc_ct_counter_create(ct_priv);
807 if (IS_ERR(shared_counter)) {
808 ret = PTR_ERR(shared_counter);
812 shared_counter->is_shared = true;
813 refcount_set(&shared_counter->refcount, 1);
814 return shared_counter;
818 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
819 struct flow_rule *flow_rule,
820 struct mlx5_ct_entry *entry,
825 if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
826 entry->counter = mlx5_tc_ct_counter_create(ct_priv);
828 entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
830 if (IS_ERR(entry->counter)) {
831 err = PTR_ERR(entry->counter);
835 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
840 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
848 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
850 mlx5_tc_ct_counter_put(ct_priv, entry);
855 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
856 struct flow_cls_offload *flow)
858 struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
859 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
860 struct flow_action_entry *meta_action;
861 unsigned long cookie = flow->cookie;
862 struct mlx5_ct_entry *entry;
865 meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
869 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
874 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
878 entry->tuple.zone = ft->zone;
879 entry->cookie = flow->cookie;
880 entry->restore_cookie = meta_action->ct_metadata.cookie;
882 err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
886 memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
887 err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
891 err = rhashtable_insert_fast(&ct_priv->ct_tuples_ht,
897 if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
898 err = rhashtable_insert_fast(&ct_priv->ct_tuples_nat_ht,
899 &entry->tuple_nat_node,
900 tuples_nat_ht_params);
905 err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
906 ft->zone_restore_id);
910 err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node,
918 mlx5_tc_ct_entry_del_rules(ct_priv, entry);
920 if (mlx5_tc_ct_entry_has_nat(entry))
921 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
922 &entry->tuple_nat_node, tuples_nat_ht_params);
924 rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
930 netdev_warn(ct_priv->netdev,
931 "Failed to offload ct entry, err: %d\n", err);
936 mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv,
937 struct mlx5_ct_entry *entry)
939 mlx5_tc_ct_entry_del_rules(ct_priv, entry);
940 mutex_lock(&ct_priv->shared_counter_lock);
941 if (mlx5_tc_ct_entry_has_nat(entry))
942 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
943 &entry->tuple_nat_node,
944 tuples_nat_ht_params);
945 rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
947 mutex_unlock(&ct_priv->shared_counter_lock);
948 mlx5_tc_ct_counter_put(ct_priv, entry);
953 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
954 struct flow_cls_offload *flow)
956 unsigned long cookie = flow->cookie;
957 struct mlx5_ct_entry *entry;
959 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
964 mlx5_tc_ct_del_ft_entry(ft->ct_priv, entry);
965 WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht,
974 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
975 struct flow_cls_offload *f)
977 unsigned long cookie = f->cookie;
978 struct mlx5_ct_entry *entry;
979 u64 lastuse, packets, bytes;
981 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
986 mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
987 flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
988 FLOW_ACTION_HW_STATS_DELAYED);
994 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
997 struct flow_cls_offload *f = type_data;
998 struct mlx5_ct_ft *ft = cb_priv;
1000 if (type != TC_SETUP_CLSFLOWER)
1003 switch (f->command) {
1004 case FLOW_CLS_REPLACE:
1005 return mlx5_tc_ct_block_flow_offload_add(ft, f);
1006 case FLOW_CLS_DESTROY:
1007 return mlx5_tc_ct_block_flow_offload_del(ft, f);
1008 case FLOW_CLS_STATS:
1009 return mlx5_tc_ct_block_flow_offload_stats(ft, f);
1018 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
1021 struct flow_keys flow_keys;
1023 skb_reset_network_header(skb);
1024 skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
1028 if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1029 flow_keys.basic.ip_proto != IPPROTO_UDP)
1032 tuple->port.src = flow_keys.ports.src;
1033 tuple->port.dst = flow_keys.ports.dst;
1034 tuple->n_proto = flow_keys.basic.n_proto;
1035 tuple->ip_proto = flow_keys.basic.ip_proto;
1037 switch (flow_keys.basic.n_proto) {
1038 case htons(ETH_P_IP):
1039 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1040 tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1041 tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1044 case htons(ETH_P_IPV6):
1045 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1046 tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1047 tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1059 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1061 u32 ctstate = 0, ctstate_mask = 0;
1063 mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
1064 &ctstate, &ctstate_mask);
1068 ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1069 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1070 ctstate, ctstate_mask);
1075 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1077 if (!priv || !ct_attr->ct_labels_id)
1080 mapping_remove(priv->labels_mapping, ct_attr->ct_labels_id);
1084 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1085 struct mlx5_flow_spec *spec,
1086 struct flow_cls_offload *f,
1087 struct mlx5_ct_attr *ct_attr,
1088 struct netlink_ext_ack *extack)
1090 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1091 struct flow_dissector_key_ct *mask, *key;
1092 bool trk, est, untrk, unest, new;
1093 u32 ctstate = 0, ctstate_mask = 0;
1094 u16 ct_state_on, ct_state_off;
1095 u16 ct_state, ct_state_mask;
1096 struct flow_match_ct match;
1099 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
1103 NL_SET_ERR_MSG_MOD(extack,
1104 "offload of ct matching isn't available");
1108 flow_rule_match_ct(rule, &match);
1113 ct_state = key->ct_state;
1114 ct_state_mask = mask->ct_state;
1116 if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1117 TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1118 TCA_FLOWER_KEY_CT_FLAGS_NEW)) {
1119 NL_SET_ERR_MSG_MOD(extack,
1120 "only ct_state trk, est and new are supported for offload");
1124 ct_state_on = ct_state & ct_state_mask;
1125 ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1126 trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1127 new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1128 est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1129 untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1130 unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1132 ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1133 ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1134 ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1135 ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1138 NL_SET_ERR_MSG_MOD(extack,
1139 "matching on ct_state +new isn't supported");
1144 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1145 key->ct_zone, MLX5_CT_ZONE_MASK);
1147 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1148 ctstate, ctstate_mask);
1150 mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1151 key->ct_mark, mask->ct_mark);
1152 if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1153 mask->ct_labels[3]) {
1154 ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1155 ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1156 ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1157 ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1158 if (mapping_add(priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id))
1160 mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1161 MLX5_CT_LABELS_MASK);
1168 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1169 struct mlx5_flow_attr *attr,
1170 const struct flow_action_entry *act,
1171 struct netlink_ext_ack *extack)
1174 NL_SET_ERR_MSG_MOD(extack,
1175 "offload of ct action isn't available");
1179 attr->ct_attr.zone = act->ct.zone;
1180 attr->ct_attr.ct_action = act->ct.action;
1181 attr->ct_attr.nf_ft = act->ct.flow_table;
1186 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1187 struct mlx5_tc_ct_pre *pre_ct,
1190 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1191 struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1192 struct mlx5_core_dev *dev = ct_priv->dev;
1193 struct mlx5_flow_table *ft = pre_ct->ft;
1194 struct mlx5_flow_destination dest = {};
1195 struct mlx5_flow_act flow_act = {};
1196 struct mlx5_modify_hdr *mod_hdr;
1197 struct mlx5_flow_handle *rule;
1198 struct mlx5_flow_spec *spec;
1203 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1207 zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1208 err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
1211 ct_dbg("Failed to set zone register mapping");
1215 mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
1216 pre_mod_acts.num_actions,
1217 pre_mod_acts.actions);
1219 if (IS_ERR(mod_hdr)) {
1220 err = PTR_ERR(mod_hdr);
1221 ct_dbg("Failed to create pre ct mod hdr");
1224 pre_ct->modify_hdr = mod_hdr;
1226 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1227 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1228 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1229 flow_act.modify_hdr = mod_hdr;
1230 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1233 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1234 zone, MLX5_CT_ZONE_MASK);
1235 ctstate = MLX5_CT_STATE_TRK_BIT;
1237 ctstate |= MLX5_CT_STATE_NAT_BIT;
1238 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1240 dest.ft = ct_priv->post_ct;
1241 rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1243 err = PTR_ERR(rule);
1244 ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1247 pre_ct->flow_rule = rule;
1250 memset(spec, 0, sizeof(*spec));
1251 dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1252 rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1254 err = PTR_ERR(rule);
1255 ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1258 pre_ct->miss_rule = rule;
1260 dealloc_mod_hdr_actions(&pre_mod_acts);
1265 mlx5_del_flow_rules(pre_ct->flow_rule);
1267 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1269 dealloc_mod_hdr_actions(&pre_mod_acts);
1275 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1276 struct mlx5_tc_ct_pre *pre_ct)
1278 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1279 struct mlx5_core_dev *dev = ct_priv->dev;
1281 mlx5_del_flow_rules(pre_ct->flow_rule);
1282 mlx5_del_flow_rules(pre_ct->miss_rule);
1283 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1287 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1288 struct mlx5_tc_ct_pre *pre_ct,
1291 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1292 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1293 struct mlx5_core_dev *dev = ct_priv->dev;
1294 struct mlx5_flow_table_attr ft_attr = {};
1295 struct mlx5_flow_namespace *ns;
1296 struct mlx5_flow_table *ft;
1297 struct mlx5_flow_group *g;
1298 u32 metadata_reg_c_2_mask;
1303 ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
1306 ct_dbg("Failed to get flow namespace");
1310 flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1314 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1315 ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ?
1316 FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1317 ft_attr.max_fte = 2;
1319 ft = mlx5_create_flow_table(ns, &ft_attr);
1322 ct_dbg("Failed to create pre ct table");
1327 /* create flow group */
1328 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1329 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1330 MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1331 MLX5_MATCH_MISC_PARAMETERS_2);
1333 misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1334 match_criteria.misc_parameters_2);
1336 metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1337 metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1339 metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1341 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1342 metadata_reg_c_2_mask);
1344 g = mlx5_create_flow_group(ft, flow_group_in);
1347 ct_dbg("Failed to create pre ct group");
1350 pre_ct->flow_grp = g;
1352 /* create miss group */
1353 memset(flow_group_in, 0, inlen);
1354 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1355 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1356 g = mlx5_create_flow_group(ft, flow_group_in);
1359 ct_dbg("Failed to create pre ct miss group");
1362 pre_ct->miss_grp = g;
1364 err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1368 kvfree(flow_group_in);
1372 mlx5_destroy_flow_group(pre_ct->miss_grp);
1374 mlx5_destroy_flow_group(pre_ct->flow_grp);
1376 mlx5_destroy_flow_table(ft);
1378 kvfree(flow_group_in);
1383 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1384 struct mlx5_tc_ct_pre *pre_ct)
1386 tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1387 mlx5_destroy_flow_group(pre_ct->miss_grp);
1388 mlx5_destroy_flow_group(pre_ct->flow_grp);
1389 mlx5_destroy_flow_table(pre_ct->ft);
1393 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1397 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1401 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1403 goto err_pre_ct_nat;
1408 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1413 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1415 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1416 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1419 static struct mlx5_ct_ft *
1420 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1421 struct nf_flowtable *nf_ft)
1423 struct mlx5_ct_ft *ft;
1426 ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1428 refcount_inc(&ft->refcount);
1432 ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1434 return ERR_PTR(-ENOMEM);
1436 err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1442 ft->ct_priv = ct_priv;
1443 refcount_set(&ft->refcount, 1);
1445 err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1447 goto err_alloc_pre_ct;
1449 err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1453 err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1458 err = nf_flow_table_offload_add_cb(ft->nf_ft,
1459 mlx5_tc_ct_block_flow_offload, ft);
1466 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1468 rhashtable_destroy(&ft->ct_entries_ht);
1470 mlx5_tc_ct_free_pre_ct_tables(ft);
1472 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1475 return ERR_PTR(err);
1479 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1481 struct mlx5_tc_ct_priv *ct_priv = arg;
1482 struct mlx5_ct_entry *entry = ptr;
1484 mlx5_tc_ct_del_ft_entry(ct_priv, entry);
1489 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1491 if (!refcount_dec_and_test(&ft->refcount))
1494 nf_flow_table_offload_del_cb(ft->nf_ft,
1495 mlx5_tc_ct_block_flow_offload, ft);
1496 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1497 rhashtable_free_and_destroy(&ft->ct_entries_ht,
1498 mlx5_tc_ct_flush_ft_entry,
1500 mlx5_tc_ct_free_pre_ct_tables(ft);
1501 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1505 /* We translate the tc filter with CT action to the following HW model:
1507 * +---------------------+
1508 * + ft prio (tc chain) +
1509 * + original match +
1510 * +---------------------+
1511 * | set chain miss mapping
1516 * +---------------------+
1517 * + pre_ct/pre_ct_nat + if matches +---------------------+
1518 * + zone+nat match +---------------->+ post_ct (see below) +
1519 * +---------------------+ set zone +---------------------+
1522 * +--------------------+
1523 * + CT (nat or no nat) +
1524 * + tuple + zone match +
1525 * +--------------------+
1529 * | set zone_restore
1530 * | do nat (if needed)
1533 * + post_ct + original filter actions
1534 * + fte_id match +------------------------>
1537 static struct mlx5_flow_handle *
1538 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1539 struct mlx5e_tc_flow *flow,
1540 struct mlx5_flow_spec *orig_spec,
1541 struct mlx5_flow_attr *attr)
1543 bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1544 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1545 struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1546 u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1547 struct mlx5_flow_spec *post_ct_spec = NULL;
1548 struct mlx5_flow_attr *pre_ct_attr;
1549 struct mlx5_modify_hdr *mod_hdr;
1550 struct mlx5_flow_handle *rule;
1551 struct mlx5_ct_flow *ct_flow;
1552 int chain_mapping = 0, err;
1553 struct mlx5_ct_ft *ft;
1556 post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
1557 ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1558 if (!post_ct_spec || !ct_flow) {
1559 kfree(post_ct_spec);
1561 return ERR_PTR(-ENOMEM);
1564 /* Register for CT established events */
1565 ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1566 attr->ct_attr.nf_ft);
1569 ct_dbg("Failed to register to ft callback");
1574 err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id,
1575 MLX5_FTE_ID_MAX, GFP_KERNEL);
1577 netdev_warn(priv->netdev,
1578 "Failed to allocate fte id, err: %d\n", err);
1581 ct_flow->fte_id = fte_id;
1583 /* Base flow attributes of both rules on original rule attribute */
1584 ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1585 if (!ct_flow->pre_ct_attr) {
1590 ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1591 if (!ct_flow->post_ct_attr) {
1593 goto err_alloc_post;
1596 pre_ct_attr = ct_flow->pre_ct_attr;
1597 memcpy(pre_ct_attr, attr, attr_sz);
1598 memcpy(ct_flow->post_ct_attr, attr, attr_sz);
1600 /* Modify the original rule's action to fwd and modify, leave decap */
1601 pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
1602 pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1603 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1605 /* Write chain miss tag for miss in ct table as we
1606 * don't go though all prios of this chain as normal tc rules
1609 err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
1612 ct_dbg("Failed to get chain register mapping for chain");
1615 ct_flow->chain_mapping = chain_mapping;
1617 err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1618 CHAIN_TO_REG, chain_mapping);
1620 ct_dbg("Failed to set chain register mapping");
1624 err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1625 FTEID_TO_REG, fte_id);
1627 ct_dbg("Failed to set fte_id register mapping");
1631 /* If original flow is decap, we do it before going into ct table
1632 * so add a rewrite for the tunnel match_id.
1634 if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
1636 u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
1638 err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
1643 ct_dbg("Failed to set tunnel register mapping");
1648 mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1649 pre_mod_acts.num_actions,
1650 pre_mod_acts.actions);
1651 if (IS_ERR(mod_hdr)) {
1652 err = PTR_ERR(mod_hdr);
1653 ct_dbg("Failed to create pre ct mod hdr");
1656 pre_ct_attr->modify_hdr = mod_hdr;
1658 /* Post ct rule matches on fte_id and executes original rule's
1661 mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
1662 fte_id, MLX5_FTE_ID_MASK);
1664 /* Put post_ct rule on post_ct flow table */
1665 ct_flow->post_ct_attr->chain = 0;
1666 ct_flow->post_ct_attr->prio = 0;
1667 ct_flow->post_ct_attr->ft = ct_priv->post_ct;
1669 ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
1670 ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
1671 ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
1672 rule = mlx5_tc_rule_insert(priv, post_ct_spec,
1673 ct_flow->post_ct_attr);
1674 ct_flow->post_ct_rule = rule;
1675 if (IS_ERR(ct_flow->post_ct_rule)) {
1676 err = PTR_ERR(ct_flow->post_ct_rule);
1677 ct_dbg("Failed to add post ct rule");
1678 goto err_insert_post_ct;
1681 /* Change original rule point to ct table */
1682 pre_ct_attr->dest_chain = 0;
1683 pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
1684 ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
1686 if (IS_ERR(ct_flow->pre_ct_rule)) {
1687 err = PTR_ERR(ct_flow->pre_ct_rule);
1688 ct_dbg("Failed to add pre ct rule");
1689 goto err_insert_orig;
1692 attr->ct_attr.ct_flow = ct_flow;
1693 dealloc_mod_hdr_actions(&pre_mod_acts);
1694 kfree(post_ct_spec);
1699 mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1700 ct_flow->post_ct_attr);
1702 mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1704 dealloc_mod_hdr_actions(&pre_mod_acts);
1705 mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1707 kfree(ct_flow->post_ct_attr);
1709 kfree(ct_flow->pre_ct_attr);
1711 idr_remove(&ct_priv->fte_ids, fte_id);
1713 mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1715 kfree(post_ct_spec);
1717 netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1718 return ERR_PTR(err);
1721 static struct mlx5_flow_handle *
1722 __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
1723 struct mlx5_flow_spec *orig_spec,
1724 struct mlx5_flow_attr *attr,
1725 struct mlx5e_tc_mod_hdr_acts *mod_acts)
1727 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1728 u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1729 struct mlx5_flow_attr *pre_ct_attr;
1730 struct mlx5_modify_hdr *mod_hdr;
1731 struct mlx5_flow_handle *rule;
1732 struct mlx5_ct_flow *ct_flow;
1735 ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1737 return ERR_PTR(-ENOMEM);
1739 /* Base esw attributes on original rule attribute */
1740 pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1746 memcpy(pre_ct_attr, attr, attr_sz);
1748 err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
1750 ct_dbg("Failed to set register for ct clear");
1751 goto err_set_registers;
1754 mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1755 mod_acts->num_actions,
1757 if (IS_ERR(mod_hdr)) {
1758 err = PTR_ERR(mod_hdr);
1759 ct_dbg("Failed to add create ct clear mod hdr");
1760 goto err_set_registers;
1763 dealloc_mod_hdr_actions(mod_acts);
1764 pre_ct_attr->modify_hdr = mod_hdr;
1765 pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1767 rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
1769 err = PTR_ERR(rule);
1770 ct_dbg("Failed to add ct clear rule");
1774 attr->ct_attr.ct_flow = ct_flow;
1775 ct_flow->pre_ct_attr = pre_ct_attr;
1776 ct_flow->pre_ct_rule = rule;
1780 mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
1782 netdev_warn(priv->netdev,
1783 "Failed to offload ct clear flow, err %d\n", err);
1788 return ERR_PTR(err);
1791 struct mlx5_flow_handle *
1792 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
1793 struct mlx5e_tc_flow *flow,
1794 struct mlx5_flow_spec *spec,
1795 struct mlx5_flow_attr *attr,
1796 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
1798 bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
1799 struct mlx5_flow_handle *rule;
1802 return ERR_PTR(-EOPNOTSUPP);
1804 mutex_lock(&priv->control_lock);
1807 rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
1809 rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
1810 mutex_unlock(&priv->control_lock);
1816 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
1817 struct mlx5e_tc_flow *flow,
1818 struct mlx5_ct_flow *ct_flow)
1820 struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
1821 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1823 mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
1825 mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1827 if (ct_flow->post_ct_rule) {
1828 mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1829 ct_flow->post_ct_attr);
1830 mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1831 idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
1832 mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
1835 kfree(ct_flow->pre_ct_attr);
1836 kfree(ct_flow->post_ct_attr);
1841 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
1842 struct mlx5e_tc_flow *flow,
1843 struct mlx5_flow_attr *attr)
1845 struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
1847 /* We are called on error to clean up stuff from parsing
1848 * but we don't have anything for now
1853 mutex_lock(&priv->control_lock);
1854 __mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
1855 mutex_unlock(&priv->control_lock);
1859 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
1860 const char **err_msg)
1862 if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
1863 *err_msg = "firmware level support is missing";
1867 if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
1868 /* vlan workaround should be avoided for multi chain rules.
1869 * This is just a sanity check as pop vlan action should
1870 * be supported by any FW that supports ignore_flow_level
1873 *err_msg = "firmware vlan actions support is missing";
1877 if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
1878 fdb_modify_header_fwd_to_table)) {
1879 /* CT always writes to registers which are mod header actions.
1880 * Therefore, mod header and goto is required
1883 *err_msg = "firmware fwd and modify support is missing";
1887 if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
1888 *err_msg = "register loopback isn't supported";
1896 mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv,
1897 const char **err_msg)
1899 if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
1900 *err_msg = "firmware level support is missing";
1908 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
1909 enum mlx5_flow_namespace_type ns_type,
1910 const char **err_msg)
1912 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1914 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1915 /* cannot restore chain ID on HW miss */
1917 *err_msg = "tc skb extension missing";
1920 if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
1921 return mlx5_tc_ct_init_check_esw_support(esw, err_msg);
1923 return mlx5_tc_ct_init_check_nic_support(priv, err_msg);
1926 #define INIT_ERR_PREFIX "tc ct offload init failed"
1928 struct mlx5_tc_ct_priv *
1929 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
1930 struct mod_hdr_tbl *mod_hdr,
1931 enum mlx5_flow_namespace_type ns_type)
1933 struct mlx5_tc_ct_priv *ct_priv;
1934 struct mlx5_core_dev *dev;
1939 err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg);
1942 "tc ct offload not supported, %s\n",
1947 ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
1951 ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
1952 if (IS_ERR(ct_priv->zone_mapping)) {
1953 err = PTR_ERR(ct_priv->zone_mapping);
1954 goto err_mapping_zone;
1957 ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
1958 if (IS_ERR(ct_priv->labels_mapping)) {
1959 err = PTR_ERR(ct_priv->labels_mapping);
1960 goto err_mapping_labels;
1963 ct_priv->ns_type = ns_type;
1964 ct_priv->chains = chains;
1965 ct_priv->netdev = priv->netdev;
1966 ct_priv->dev = priv->mdev;
1967 ct_priv->mod_hdr_tbl = mod_hdr;
1968 ct_priv->ct = mlx5_chains_create_global_table(chains);
1969 if (IS_ERR(ct_priv->ct)) {
1970 err = PTR_ERR(ct_priv->ct);
1972 "%s, failed to create ct table err: %d\n",
1973 INIT_ERR_PREFIX, err);
1977 ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
1978 if (IS_ERR(ct_priv->ct_nat)) {
1979 err = PTR_ERR(ct_priv->ct_nat);
1981 "%s, failed to create ct nat table err: %d\n",
1982 INIT_ERR_PREFIX, err);
1983 goto err_ct_nat_tbl;
1986 ct_priv->post_ct = mlx5_chains_create_global_table(chains);
1987 if (IS_ERR(ct_priv->post_ct)) {
1988 err = PTR_ERR(ct_priv->post_ct);
1990 "%s, failed to create post ct table err: %d\n",
1991 INIT_ERR_PREFIX, err);
1992 goto err_post_ct_tbl;
1995 idr_init(&ct_priv->fte_ids);
1996 mutex_init(&ct_priv->control_lock);
1997 mutex_init(&ct_priv->shared_counter_lock);
1998 rhashtable_init(&ct_priv->zone_ht, &zone_params);
1999 rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
2000 rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
2005 mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2007 mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2009 mapping_destroy(ct_priv->labels_mapping);
2011 mapping_destroy(ct_priv->zone_mapping);
2021 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
2023 struct mlx5_fs_chains *chains;
2028 chains = ct_priv->chains;
2030 mlx5_chains_destroy_global_table(chains, ct_priv->post_ct);
2031 mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2032 mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2033 mapping_destroy(ct_priv->zone_mapping);
2034 mapping_destroy(ct_priv->labels_mapping);
2036 rhashtable_destroy(&ct_priv->ct_tuples_ht);
2037 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2038 rhashtable_destroy(&ct_priv->zone_ht);
2039 mutex_destroy(&ct_priv->control_lock);
2040 mutex_destroy(&ct_priv->shared_counter_lock);
2041 idr_destroy(&ct_priv->fte_ids);
2046 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2047 struct sk_buff *skb, u8 zone_restore_id)
2049 struct mlx5_ct_tuple tuple = {};
2050 struct mlx5_ct_entry *entry;
2053 if (!ct_priv || !zone_restore_id)
2056 if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
2059 if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
2062 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &tuple,
2065 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
2066 &tuple, tuples_nat_ht_params);
2070 tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);