Merge tag 'usb-serial-5.12-rc1' of https://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / drivers / net / ethernet / mellanox / mlx5 / core / en / tc_ct.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include <net/netfilter/nf_conntrack.h>
5 #include <net/netfilter/nf_conntrack_core.h>
6 #include <net/netfilter/nf_conntrack_zones.h>
7 #include <net/netfilter/nf_conntrack_labels.h>
8 #include <net/netfilter/nf_conntrack_helper.h>
9 #include <net/netfilter/nf_conntrack_acct.h>
10 #include <uapi/linux/tc_act/tc_pedit.h>
11 #include <net/tc_act/tc_ct.h>
12 #include <net/flow_offload.h>
13 #include <net/netfilter/nf_flow_table.h>
14 #include <linux/workqueue.h>
15 #include <linux/xarray.h>
16
17 #include "lib/fs_chains.h"
18 #include "en/tc_ct.h"
19 #include "en/mod_hdr.h"
20 #include "en/mapping.h"
21 #include "en.h"
22 #include "en_tc.h"
23 #include "en_rep.h"
24
25 #define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8)
26 #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
27 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
28 #define MLX5_CT_STATE_TRK_BIT BIT(2)
29 #define MLX5_CT_STATE_NAT_BIT BIT(3)
30
31 #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
32 #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
33 #define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
34
35 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen * 8)
36 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
37
38 #define ct_dbg(fmt, args...)\
39         netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
40
41 struct mlx5_tc_ct_priv {
42         struct mlx5_core_dev *dev;
43         const struct net_device *netdev;
44         struct mod_hdr_tbl *mod_hdr_tbl;
45         struct idr fte_ids;
46         struct xarray tuple_ids;
47         struct rhashtable zone_ht;
48         struct rhashtable ct_tuples_ht;
49         struct rhashtable ct_tuples_nat_ht;
50         struct mlx5_flow_table *ct;
51         struct mlx5_flow_table *ct_nat;
52         struct mlx5_flow_table *post_ct;
53         struct mutex control_lock; /* guards parallel adds/dels */
54         struct mutex shared_counter_lock;
55         struct mapping_ctx *zone_mapping;
56         struct mapping_ctx *labels_mapping;
57         enum mlx5_flow_namespace_type ns_type;
58         struct mlx5_fs_chains *chains;
59 };
60
61 struct mlx5_ct_flow {
62         struct mlx5_flow_attr *pre_ct_attr;
63         struct mlx5_flow_attr *post_ct_attr;
64         struct mlx5_flow_handle *pre_ct_rule;
65         struct mlx5_flow_handle *post_ct_rule;
66         struct mlx5_ct_ft *ft;
67         u32 fte_id;
68         u32 chain_mapping;
69 };
70
71 struct mlx5_ct_zone_rule {
72         struct mlx5_flow_handle *rule;
73         struct mlx5e_mod_hdr_handle *mh;
74         struct mlx5_flow_attr *attr;
75         bool nat;
76 };
77
78 struct mlx5_tc_ct_pre {
79         struct mlx5_flow_table *ft;
80         struct mlx5_flow_group *flow_grp;
81         struct mlx5_flow_group *miss_grp;
82         struct mlx5_flow_handle *flow_rule;
83         struct mlx5_flow_handle *miss_rule;
84         struct mlx5_modify_hdr *modify_hdr;
85 };
86
87 struct mlx5_ct_ft {
88         struct rhash_head node;
89         u16 zone;
90         u32 zone_restore_id;
91         refcount_t refcount;
92         struct nf_flowtable *nf_ft;
93         struct mlx5_tc_ct_priv *ct_priv;
94         struct rhashtable ct_entries_ht;
95         struct mlx5_tc_ct_pre pre_ct;
96         struct mlx5_tc_ct_pre pre_ct_nat;
97 };
98
99 struct mlx5_ct_tuple {
100         u16 addr_type;
101         __be16 n_proto;
102         u8 ip_proto;
103         struct {
104                 union {
105                         __be32 src_v4;
106                         struct in6_addr src_v6;
107                 };
108                 union {
109                         __be32 dst_v4;
110                         struct in6_addr dst_v6;
111                 };
112         } ip;
113         struct {
114                 __be16 src;
115                 __be16 dst;
116         } port;
117
118         u16 zone;
119 };
120
121 struct mlx5_ct_counter {
122         struct mlx5_fc *counter;
123         refcount_t refcount;
124         bool is_shared;
125 };
126
127 struct mlx5_ct_entry {
128         struct rhash_head node;
129         struct rhash_head tuple_node;
130         struct rhash_head tuple_nat_node;
131         struct mlx5_ct_counter *counter;
132         unsigned long cookie;
133         unsigned long restore_cookie;
134         struct mlx5_ct_tuple tuple;
135         struct mlx5_ct_tuple tuple_nat;
136         struct mlx5_ct_zone_rule zone_rules[2];
137 };
138
139 static const struct rhashtable_params cts_ht_params = {
140         .head_offset = offsetof(struct mlx5_ct_entry, node),
141         .key_offset = offsetof(struct mlx5_ct_entry, cookie),
142         .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
143         .automatic_shrinking = true,
144         .min_size = 16 * 1024,
145 };
146
147 static const struct rhashtable_params zone_params = {
148         .head_offset = offsetof(struct mlx5_ct_ft, node),
149         .key_offset = offsetof(struct mlx5_ct_ft, zone),
150         .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
151         .automatic_shrinking = true,
152 };
153
154 static const struct rhashtable_params tuples_ht_params = {
155         .head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
156         .key_offset = offsetof(struct mlx5_ct_entry, tuple),
157         .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
158         .automatic_shrinking = true,
159         .min_size = 16 * 1024,
160 };
161
162 static const struct rhashtable_params tuples_nat_ht_params = {
163         .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
164         .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
165         .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
166         .automatic_shrinking = true,
167         .min_size = 16 * 1024,
168 };
169
170 static bool
171 mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
172 {
173         return !!(entry->tuple_nat_node.next);
174 }
175
176 static int
177 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
178 {
179         struct flow_match_control control;
180         struct flow_match_basic basic;
181
182         flow_rule_match_basic(rule, &basic);
183         flow_rule_match_control(rule, &control);
184
185         tuple->n_proto = basic.key->n_proto;
186         tuple->ip_proto = basic.key->ip_proto;
187         tuple->addr_type = control.key->addr_type;
188
189         if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
190                 struct flow_match_ipv4_addrs match;
191
192                 flow_rule_match_ipv4_addrs(rule, &match);
193                 tuple->ip.src_v4 = match.key->src;
194                 tuple->ip.dst_v4 = match.key->dst;
195         } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
196                 struct flow_match_ipv6_addrs match;
197
198                 flow_rule_match_ipv6_addrs(rule, &match);
199                 tuple->ip.src_v6 = match.key->src;
200                 tuple->ip.dst_v6 = match.key->dst;
201         } else {
202                 return -EOPNOTSUPP;
203         }
204
205         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
206                 struct flow_match_ports match;
207
208                 flow_rule_match_ports(rule, &match);
209                 switch (tuple->ip_proto) {
210                 case IPPROTO_TCP:
211                 case IPPROTO_UDP:
212                         tuple->port.src = match.key->src;
213                         tuple->port.dst = match.key->dst;
214                         break;
215                 default:
216                         return -EOPNOTSUPP;
217                 }
218         } else {
219                 return -EOPNOTSUPP;
220         }
221
222         return 0;
223 }
224
225 static int
226 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
227                              struct flow_rule *rule)
228 {
229         struct flow_action *flow_action = &rule->action;
230         struct flow_action_entry *act;
231         u32 offset, val, ip6_offset;
232         int i;
233
234         flow_action_for_each(i, act, flow_action) {
235                 if (act->id != FLOW_ACTION_MANGLE)
236                         continue;
237
238                 offset = act->mangle.offset;
239                 val = act->mangle.val;
240                 switch (act->mangle.htype) {
241                 case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
242                         if (offset == offsetof(struct iphdr, saddr))
243                                 tuple->ip.src_v4 = cpu_to_be32(val);
244                         else if (offset == offsetof(struct iphdr, daddr))
245                                 tuple->ip.dst_v4 = cpu_to_be32(val);
246                         else
247                                 return -EOPNOTSUPP;
248                         break;
249
250                 case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
251                         ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
252                         ip6_offset /= 4;
253                         if (ip6_offset < 4)
254                                 tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
255                         else if (ip6_offset < 8)
256                                 tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
257                         else
258                                 return -EOPNOTSUPP;
259                         break;
260
261                 case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
262                         if (offset == offsetof(struct tcphdr, source))
263                                 tuple->port.src = cpu_to_be16(val);
264                         else if (offset == offsetof(struct tcphdr, dest))
265                                 tuple->port.dst = cpu_to_be16(val);
266                         else
267                                 return -EOPNOTSUPP;
268                         break;
269
270                 case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
271                         if (offset == offsetof(struct udphdr, source))
272                                 tuple->port.src = cpu_to_be16(val);
273                         else if (offset == offsetof(struct udphdr, dest))
274                                 tuple->port.dst = cpu_to_be16(val);
275                         else
276                                 return -EOPNOTSUPP;
277                         break;
278
279                 default:
280                         return -EOPNOTSUPP;
281                 }
282         }
283
284         return 0;
285 }
286
287 static int
288 mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
289                            struct flow_rule *rule)
290 {
291         void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
292                                        outer_headers);
293         void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
294                                        outer_headers);
295         u16 addr_type = 0;
296         u8 ip_proto = 0;
297
298         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
299                 struct flow_match_basic match;
300
301                 flow_rule_match_basic(rule, &match);
302
303                 mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
304                                        headers_v);
305                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
306                          match.mask->ip_proto);
307                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
308                          match.key->ip_proto);
309
310                 ip_proto = match.key->ip_proto;
311         }
312
313         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
314                 struct flow_match_control match;
315
316                 flow_rule_match_control(rule, &match);
317                 addr_type = match.key->addr_type;
318         }
319
320         if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
321                 struct flow_match_ipv4_addrs match;
322
323                 flow_rule_match_ipv4_addrs(rule, &match);
324                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
325                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
326                        &match.mask->src, sizeof(match.mask->src));
327                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
328                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
329                        &match.key->src, sizeof(match.key->src));
330                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
331                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
332                        &match.mask->dst, sizeof(match.mask->dst));
333                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
334                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
335                        &match.key->dst, sizeof(match.key->dst));
336         }
337
338         if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
339                 struct flow_match_ipv6_addrs match;
340
341                 flow_rule_match_ipv6_addrs(rule, &match);
342                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
343                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
344                        &match.mask->src, sizeof(match.mask->src));
345                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
346                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
347                        &match.key->src, sizeof(match.key->src));
348
349                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
350                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
351                        &match.mask->dst, sizeof(match.mask->dst));
352                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
353                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
354                        &match.key->dst, sizeof(match.key->dst));
355         }
356
357         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
358                 struct flow_match_ports match;
359
360                 flow_rule_match_ports(rule, &match);
361                 switch (ip_proto) {
362                 case IPPROTO_TCP:
363                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
364                                  tcp_sport, ntohs(match.mask->src));
365                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
366                                  tcp_sport, ntohs(match.key->src));
367
368                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
369                                  tcp_dport, ntohs(match.mask->dst));
370                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
371                                  tcp_dport, ntohs(match.key->dst));
372                         break;
373
374                 case IPPROTO_UDP:
375                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
376                                  udp_sport, ntohs(match.mask->src));
377                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
378                                  udp_sport, ntohs(match.key->src));
379
380                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
381                                  udp_dport, ntohs(match.mask->dst));
382                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
383                                  udp_dport, ntohs(match.key->dst));
384                         break;
385                 default:
386                         break;
387                 }
388         }
389
390         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
391                 struct flow_match_tcp match;
392
393                 flow_rule_match_tcp(rule, &match);
394                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
395                          ntohs(match.mask->flags));
396                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
397                          ntohs(match.key->flags));
398         }
399
400         return 0;
401 }
402
403 static void
404 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
405 {
406         if (entry->counter->is_shared &&
407             !refcount_dec_and_test(&entry->counter->refcount))
408                 return;
409
410         mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
411         kfree(entry->counter);
412 }
413
414 static void
415 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
416                           struct mlx5_ct_entry *entry,
417                           bool nat)
418 {
419         struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
420         struct mlx5_flow_attr *attr = zone_rule->attr;
421
422         ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
423
424         mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
425         mlx5e_mod_hdr_detach(ct_priv->dev,
426                              ct_priv->mod_hdr_tbl, zone_rule->mh);
427         mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
428         kfree(attr);
429 }
430
431 static void
432 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
433                            struct mlx5_ct_entry *entry)
434 {
435         mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
436         mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
437 }
438
439 static struct flow_action_entry *
440 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
441 {
442         struct flow_action *flow_action = &flow_rule->action;
443         struct flow_action_entry *act;
444         int i;
445
446         flow_action_for_each(i, act, flow_action) {
447                 if (act->id == FLOW_ACTION_CT_METADATA)
448                         return act;
449         }
450
451         return NULL;
452 }
453
454 static int
455 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
456                                struct mlx5e_tc_mod_hdr_acts *mod_acts,
457                                u8 ct_state,
458                                u32 mark,
459                                u32 labels_id,
460                                u8 zone_restore_id)
461 {
462         enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
463         struct mlx5_core_dev *dev = ct_priv->dev;
464         int err;
465
466         err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
467                                         CTSTATE_TO_REG, ct_state);
468         if (err)
469                 return err;
470
471         err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
472                                         MARK_TO_REG, mark);
473         if (err)
474                 return err;
475
476         err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
477                                         LABELS_TO_REG, labels_id);
478         if (err)
479                 return err;
480
481         err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
482                                         ZONE_RESTORE_TO_REG, zone_restore_id);
483         if (err)
484                 return err;
485
486         /* Make another copy of zone id in reg_b for
487          * NIC rx flows since we don't copy reg_c1 to
488          * reg_b upon miss.
489          */
490         if (ns != MLX5_FLOW_NAMESPACE_FDB) {
491                 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
492                                                 NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
493                 if (err)
494                         return err;
495         }
496         return 0;
497 }
498
499 static int
500 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
501                                    char *modact)
502 {
503         u32 offset = act->mangle.offset, field;
504
505         switch (act->mangle.htype) {
506         case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
507                 MLX5_SET(set_action_in, modact, length, 0);
508                 if (offset == offsetof(struct iphdr, saddr))
509                         field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
510                 else if (offset == offsetof(struct iphdr, daddr))
511                         field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
512                 else
513                         return -EOPNOTSUPP;
514                 break;
515
516         case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
517                 MLX5_SET(set_action_in, modact, length, 0);
518                 if (offset == offsetof(struct ipv6hdr, saddr) + 12)
519                         field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
520                 else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
521                         field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
522                 else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
523                         field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
524                 else if (offset == offsetof(struct ipv6hdr, saddr))
525                         field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
526                 else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
527                         field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
528                 else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
529                         field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
530                 else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
531                         field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
532                 else if (offset == offsetof(struct ipv6hdr, daddr))
533                         field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
534                 else
535                         return -EOPNOTSUPP;
536                 break;
537
538         case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
539                 MLX5_SET(set_action_in, modact, length, 16);
540                 if (offset == offsetof(struct tcphdr, source))
541                         field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
542                 else if (offset == offsetof(struct tcphdr, dest))
543                         field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
544                 else
545                         return -EOPNOTSUPP;
546                 break;
547
548         case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
549                 MLX5_SET(set_action_in, modact, length, 16);
550                 if (offset == offsetof(struct udphdr, source))
551                         field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
552                 else if (offset == offsetof(struct udphdr, dest))
553                         field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
554                 else
555                         return -EOPNOTSUPP;
556                 break;
557
558         default:
559                 return -EOPNOTSUPP;
560         }
561
562         MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
563         MLX5_SET(set_action_in, modact, offset, 0);
564         MLX5_SET(set_action_in, modact, field, field);
565         MLX5_SET(set_action_in, modact, data, act->mangle.val);
566
567         return 0;
568 }
569
570 static int
571 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
572                             struct flow_rule *flow_rule,
573                             struct mlx5e_tc_mod_hdr_acts *mod_acts)
574 {
575         struct flow_action *flow_action = &flow_rule->action;
576         struct mlx5_core_dev *mdev = ct_priv->dev;
577         struct flow_action_entry *act;
578         size_t action_size;
579         char *modact;
580         int err, i;
581
582         action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
583
584         flow_action_for_each(i, act, flow_action) {
585                 switch (act->id) {
586                 case FLOW_ACTION_MANGLE: {
587                         err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type,
588                                                     mod_acts);
589                         if (err)
590                                 return err;
591
592                         modact = mod_acts->actions +
593                                  mod_acts->num_actions * action_size;
594
595                         err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
596                         if (err)
597                                 return err;
598
599                         mod_acts->num_actions++;
600                 }
601                 break;
602
603                 case FLOW_ACTION_CT_METADATA:
604                         /* Handled earlier */
605                         continue;
606                 default:
607                         return -EOPNOTSUPP;
608                 }
609         }
610
611         return 0;
612 }
613
614 static int
615 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
616                                 struct mlx5_flow_attr *attr,
617                                 struct flow_rule *flow_rule,
618                                 struct mlx5e_mod_hdr_handle **mh,
619                                 u8 zone_restore_id, bool nat)
620 {
621         struct mlx5e_tc_mod_hdr_acts mod_acts = {};
622         struct flow_action_entry *meta;
623         u16 ct_state = 0;
624         int err;
625
626         meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
627         if (!meta)
628                 return -EOPNOTSUPP;
629
630         err = mapping_add(ct_priv->labels_mapping, meta->ct_metadata.labels,
631                           &attr->ct_attr.ct_labels_id);
632         if (err)
633                 return -EOPNOTSUPP;
634         if (nat) {
635                 err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
636                                                   &mod_acts);
637                 if (err)
638                         goto err_mapping;
639
640                 ct_state |= MLX5_CT_STATE_NAT_BIT;
641         }
642
643         ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
644         err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
645                                              ct_state,
646                                              meta->ct_metadata.mark,
647                                              attr->ct_attr.ct_labels_id,
648                                              zone_restore_id);
649         if (err)
650                 goto err_mapping;
651
652         *mh = mlx5e_mod_hdr_attach(ct_priv->dev,
653                                    ct_priv->mod_hdr_tbl,
654                                    ct_priv->ns_type,
655                                    &mod_acts);
656         if (IS_ERR(*mh)) {
657                 err = PTR_ERR(*mh);
658                 goto err_mapping;
659         }
660         attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
661
662         dealloc_mod_hdr_actions(&mod_acts);
663         return 0;
664
665 err_mapping:
666         dealloc_mod_hdr_actions(&mod_acts);
667         mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
668         return err;
669 }
670
671 static int
672 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
673                           struct flow_rule *flow_rule,
674                           struct mlx5_ct_entry *entry,
675                           bool nat, u8 zone_restore_id)
676 {
677         struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
678         struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
679         struct mlx5_flow_spec *spec = NULL;
680         struct mlx5_flow_attr *attr;
681         int err;
682
683         zone_rule->nat = nat;
684
685         spec = kzalloc(sizeof(*spec), GFP_KERNEL);
686         if (!spec)
687                 return -ENOMEM;
688
689         attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
690         if (!attr) {
691                 err = -ENOMEM;
692                 goto err_attr;
693         }
694
695         err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
696                                               &zone_rule->mh,
697                                               zone_restore_id, nat);
698         if (err) {
699                 ct_dbg("Failed to create ct entry mod hdr");
700                 goto err_mod_hdr;
701         }
702
703         attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
704                        MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
705                        MLX5_FLOW_CONTEXT_ACTION_COUNT;
706         attr->dest_chain = 0;
707         attr->dest_ft = ct_priv->post_ct;
708         attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
709         attr->outer_match_level = MLX5_MATCH_L4;
710         attr->counter = entry->counter->counter;
711         attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
712
713         mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
714         mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
715                                     entry->tuple.zone & MLX5_CT_ZONE_MASK,
716                                     MLX5_CT_ZONE_MASK);
717
718         zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
719         if (IS_ERR(zone_rule->rule)) {
720                 err = PTR_ERR(zone_rule->rule);
721                 ct_dbg("Failed to add ct entry rule, nat: %d", nat);
722                 goto err_rule;
723         }
724
725         zone_rule->attr = attr;
726
727         kfree(spec);
728         ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
729
730         return 0;
731
732 err_rule:
733         mlx5e_mod_hdr_detach(ct_priv->dev,
734                              ct_priv->mod_hdr_tbl, zone_rule->mh);
735         mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
736 err_mod_hdr:
737         kfree(attr);
738 err_attr:
739         kfree(spec);
740         return err;
741 }
742
743 static struct mlx5_ct_counter *
744 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
745 {
746         struct mlx5_ct_counter *counter;
747         int ret;
748
749         counter = kzalloc(sizeof(*counter), GFP_KERNEL);
750         if (!counter)
751                 return ERR_PTR(-ENOMEM);
752
753         counter->is_shared = false;
754         counter->counter = mlx5_fc_create(ct_priv->dev, true);
755         if (IS_ERR(counter->counter)) {
756                 ct_dbg("Failed to create counter for ct entry");
757                 ret = PTR_ERR(counter->counter);
758                 kfree(counter);
759                 return ERR_PTR(ret);
760         }
761
762         return counter;
763 }
764
765 static struct mlx5_ct_counter *
766 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
767                               struct mlx5_ct_entry *entry)
768 {
769         struct mlx5_ct_tuple rev_tuple = entry->tuple;
770         struct mlx5_ct_counter *shared_counter;
771         struct mlx5_ct_entry *rev_entry;
772         __be16 tmp_port;
773         int ret;
774
775         /* get the reversed tuple */
776         tmp_port = rev_tuple.port.src;
777         rev_tuple.port.src = rev_tuple.port.dst;
778         rev_tuple.port.dst = tmp_port;
779
780         if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
781                 __be32 tmp_addr = rev_tuple.ip.src_v4;
782
783                 rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
784                 rev_tuple.ip.dst_v4 = tmp_addr;
785         } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
786                 struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
787
788                 rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
789                 rev_tuple.ip.dst_v6 = tmp_addr;
790         } else {
791                 return ERR_PTR(-EOPNOTSUPP);
792         }
793
794         /* Use the same counter as the reverse direction */
795         mutex_lock(&ct_priv->shared_counter_lock);
796         rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple,
797                                            tuples_ht_params);
798         if (rev_entry) {
799                 if (refcount_inc_not_zero(&rev_entry->counter->refcount)) {
800                         mutex_unlock(&ct_priv->shared_counter_lock);
801                         return rev_entry->counter;
802                 }
803         }
804         mutex_unlock(&ct_priv->shared_counter_lock);
805
806         shared_counter = mlx5_tc_ct_counter_create(ct_priv);
807         if (IS_ERR(shared_counter)) {
808                 ret = PTR_ERR(shared_counter);
809                 return ERR_PTR(ret);
810         }
811
812         shared_counter->is_shared = true;
813         refcount_set(&shared_counter->refcount, 1);
814         return shared_counter;
815 }
816
817 static int
818 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
819                            struct flow_rule *flow_rule,
820                            struct mlx5_ct_entry *entry,
821                            u8 zone_restore_id)
822 {
823         int err;
824
825         if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
826                 entry->counter = mlx5_tc_ct_counter_create(ct_priv);
827         else
828                 entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
829
830         if (IS_ERR(entry->counter)) {
831                 err = PTR_ERR(entry->counter);
832                 return err;
833         }
834
835         err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
836                                         zone_restore_id);
837         if (err)
838                 goto err_orig;
839
840         err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
841                                         zone_restore_id);
842         if (err)
843                 goto err_nat;
844
845         return 0;
846
847 err_nat:
848         mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
849 err_orig:
850         mlx5_tc_ct_counter_put(ct_priv, entry);
851         return err;
852 }
853
854 static int
855 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
856                                   struct flow_cls_offload *flow)
857 {
858         struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
859         struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
860         struct flow_action_entry *meta_action;
861         unsigned long cookie = flow->cookie;
862         struct mlx5_ct_entry *entry;
863         int err;
864
865         meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
866         if (!meta_action)
867                 return -EOPNOTSUPP;
868
869         entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
870                                        cts_ht_params);
871         if (entry)
872                 return 0;
873
874         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
875         if (!entry)
876                 return -ENOMEM;
877
878         entry->tuple.zone = ft->zone;
879         entry->cookie = flow->cookie;
880         entry->restore_cookie = meta_action->ct_metadata.cookie;
881
882         err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
883         if (err)
884                 goto err_set;
885
886         memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
887         err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
888         if (err)
889                 goto err_set;
890
891         err = rhashtable_insert_fast(&ct_priv->ct_tuples_ht,
892                                      &entry->tuple_node,
893                                      tuples_ht_params);
894         if (err)
895                 goto err_tuple;
896
897         if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
898                 err = rhashtable_insert_fast(&ct_priv->ct_tuples_nat_ht,
899                                              &entry->tuple_nat_node,
900                                              tuples_nat_ht_params);
901                 if (err)
902                         goto err_tuple_nat;
903         }
904
905         err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
906                                          ft->zone_restore_id);
907         if (err)
908                 goto err_rules;
909
910         err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node,
911                                      cts_ht_params);
912         if (err)
913                 goto err_insert;
914
915         return 0;
916
917 err_insert:
918         mlx5_tc_ct_entry_del_rules(ct_priv, entry);
919 err_rules:
920         if (mlx5_tc_ct_entry_has_nat(entry))
921                 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
922                                        &entry->tuple_nat_node, tuples_nat_ht_params);
923 err_tuple_nat:
924         rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
925                                &entry->tuple_node,
926                                tuples_ht_params);
927 err_tuple:
928 err_set:
929         kfree(entry);
930         netdev_warn(ct_priv->netdev,
931                     "Failed to offload ct entry, err: %d\n", err);
932         return err;
933 }
934
935 static void
936 mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv,
937                         struct mlx5_ct_entry *entry)
938 {
939         mlx5_tc_ct_entry_del_rules(ct_priv, entry);
940         mutex_lock(&ct_priv->shared_counter_lock);
941         if (mlx5_tc_ct_entry_has_nat(entry))
942                 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
943                                        &entry->tuple_nat_node,
944                                        tuples_nat_ht_params);
945         rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
946                                tuples_ht_params);
947         mutex_unlock(&ct_priv->shared_counter_lock);
948         mlx5_tc_ct_counter_put(ct_priv, entry);
949
950 }
951
952 static int
953 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
954                                   struct flow_cls_offload *flow)
955 {
956         unsigned long cookie = flow->cookie;
957         struct mlx5_ct_entry *entry;
958
959         entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
960                                        cts_ht_params);
961         if (!entry)
962                 return -ENOENT;
963
964         mlx5_tc_ct_del_ft_entry(ft->ct_priv, entry);
965         WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht,
966                                        &entry->node,
967                                        cts_ht_params));
968         kfree(entry);
969
970         return 0;
971 }
972
973 static int
974 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
975                                     struct flow_cls_offload *f)
976 {
977         unsigned long cookie = f->cookie;
978         struct mlx5_ct_entry *entry;
979         u64 lastuse, packets, bytes;
980
981         entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
982                                        cts_ht_params);
983         if (!entry)
984                 return -ENOENT;
985
986         mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
987         flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
988                           FLOW_ACTION_HW_STATS_DELAYED);
989
990         return 0;
991 }
992
993 static int
994 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
995                               void *cb_priv)
996 {
997         struct flow_cls_offload *f = type_data;
998         struct mlx5_ct_ft *ft = cb_priv;
999
1000         if (type != TC_SETUP_CLSFLOWER)
1001                 return -EOPNOTSUPP;
1002
1003         switch (f->command) {
1004         case FLOW_CLS_REPLACE:
1005                 return mlx5_tc_ct_block_flow_offload_add(ft, f);
1006         case FLOW_CLS_DESTROY:
1007                 return mlx5_tc_ct_block_flow_offload_del(ft, f);
1008         case FLOW_CLS_STATS:
1009                 return mlx5_tc_ct_block_flow_offload_stats(ft, f);
1010         default:
1011                 break;
1012         }
1013
1014         return -EOPNOTSUPP;
1015 }
1016
1017 static bool
1018 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
1019                         u16 zone)
1020 {
1021         struct flow_keys flow_keys;
1022
1023         skb_reset_network_header(skb);
1024         skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
1025
1026         tuple->zone = zone;
1027
1028         if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1029             flow_keys.basic.ip_proto != IPPROTO_UDP)
1030                 return false;
1031
1032         tuple->port.src = flow_keys.ports.src;
1033         tuple->port.dst = flow_keys.ports.dst;
1034         tuple->n_proto = flow_keys.basic.n_proto;
1035         tuple->ip_proto = flow_keys.basic.ip_proto;
1036
1037         switch (flow_keys.basic.n_proto) {
1038         case htons(ETH_P_IP):
1039                 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1040                 tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1041                 tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1042                 break;
1043
1044         case htons(ETH_P_IPV6):
1045                 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1046                 tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1047                 tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1048                 break;
1049         default:
1050                 goto out;
1051         }
1052
1053         return true;
1054
1055 out:
1056         return false;
1057 }
1058
1059 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1060 {
1061         u32 ctstate = 0, ctstate_mask = 0;
1062
1063         mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
1064                                         &ctstate, &ctstate_mask);
1065         if (ctstate_mask)
1066                 return -EOPNOTSUPP;
1067
1068         ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1069         mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1070                                     ctstate, ctstate_mask);
1071
1072         return 0;
1073 }
1074
1075 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1076 {
1077         if (!priv || !ct_attr->ct_labels_id)
1078                 return;
1079
1080         mapping_remove(priv->labels_mapping, ct_attr->ct_labels_id);
1081 }
1082
1083 int
1084 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1085                      struct mlx5_flow_spec *spec,
1086                      struct flow_cls_offload *f,
1087                      struct mlx5_ct_attr *ct_attr,
1088                      struct netlink_ext_ack *extack)
1089 {
1090         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1091         struct flow_dissector_key_ct *mask, *key;
1092         bool trk, est, untrk, unest, new;
1093         u32 ctstate = 0, ctstate_mask = 0;
1094         u16 ct_state_on, ct_state_off;
1095         u16 ct_state, ct_state_mask;
1096         struct flow_match_ct match;
1097         u32 ct_labels[4];
1098
1099         if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
1100                 return 0;
1101
1102         if (!priv) {
1103                 NL_SET_ERR_MSG_MOD(extack,
1104                                    "offload of ct matching isn't available");
1105                 return -EOPNOTSUPP;
1106         }
1107
1108         flow_rule_match_ct(rule, &match);
1109
1110         key = match.key;
1111         mask = match.mask;
1112
1113         ct_state = key->ct_state;
1114         ct_state_mask = mask->ct_state;
1115
1116         if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1117                               TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1118                               TCA_FLOWER_KEY_CT_FLAGS_NEW)) {
1119                 NL_SET_ERR_MSG_MOD(extack,
1120                                    "only ct_state trk, est and new are supported for offload");
1121                 return -EOPNOTSUPP;
1122         }
1123
1124         ct_state_on = ct_state & ct_state_mask;
1125         ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1126         trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1127         new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1128         est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1129         untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1130         unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1131
1132         ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1133         ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1134         ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1135         ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1136
1137         if (new) {
1138                 NL_SET_ERR_MSG_MOD(extack,
1139                                    "matching on ct_state +new isn't supported");
1140                 return -EOPNOTSUPP;
1141         }
1142
1143         if (mask->ct_zone)
1144                 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1145                                             key->ct_zone, MLX5_CT_ZONE_MASK);
1146         if (ctstate_mask)
1147                 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1148                                             ctstate, ctstate_mask);
1149         if (mask->ct_mark)
1150                 mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1151                                             key->ct_mark, mask->ct_mark);
1152         if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1153             mask->ct_labels[3]) {
1154                 ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1155                 ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1156                 ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1157                 ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1158                 if (mapping_add(priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id))
1159                         return -EOPNOTSUPP;
1160                 mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1161                                             MLX5_CT_LABELS_MASK);
1162         }
1163
1164         return 0;
1165 }
1166
1167 int
1168 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1169                         struct mlx5_flow_attr *attr,
1170                         const struct flow_action_entry *act,
1171                         struct netlink_ext_ack *extack)
1172 {
1173         if (!priv) {
1174                 NL_SET_ERR_MSG_MOD(extack,
1175                                    "offload of ct action isn't available");
1176                 return -EOPNOTSUPP;
1177         }
1178
1179         attr->ct_attr.zone = act->ct.zone;
1180         attr->ct_attr.ct_action = act->ct.action;
1181         attr->ct_attr.nf_ft = act->ct.flow_table;
1182
1183         return 0;
1184 }
1185
1186 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1187                                   struct mlx5_tc_ct_pre *pre_ct,
1188                                   bool nat)
1189 {
1190         struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1191         struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1192         struct mlx5_core_dev *dev = ct_priv->dev;
1193         struct mlx5_flow_table *ft = pre_ct->ft;
1194         struct mlx5_flow_destination dest = {};
1195         struct mlx5_flow_act flow_act = {};
1196         struct mlx5_modify_hdr *mod_hdr;
1197         struct mlx5_flow_handle *rule;
1198         struct mlx5_flow_spec *spec;
1199         u32 ctstate;
1200         u16 zone;
1201         int err;
1202
1203         spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1204         if (!spec)
1205                 return -ENOMEM;
1206
1207         zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1208         err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
1209                                         ZONE_TO_REG, zone);
1210         if (err) {
1211                 ct_dbg("Failed to set zone register mapping");
1212                 goto err_mapping;
1213         }
1214
1215         mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
1216                                            pre_mod_acts.num_actions,
1217                                            pre_mod_acts.actions);
1218
1219         if (IS_ERR(mod_hdr)) {
1220                 err = PTR_ERR(mod_hdr);
1221                 ct_dbg("Failed to create pre ct mod hdr");
1222                 goto err_mapping;
1223         }
1224         pre_ct->modify_hdr = mod_hdr;
1225
1226         flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1227                           MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1228         flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1229         flow_act.modify_hdr = mod_hdr;
1230         dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1231
1232         /* add flow rule */
1233         mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1234                                     zone, MLX5_CT_ZONE_MASK);
1235         ctstate = MLX5_CT_STATE_TRK_BIT;
1236         if (nat)
1237                 ctstate |= MLX5_CT_STATE_NAT_BIT;
1238         mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1239
1240         dest.ft = ct_priv->post_ct;
1241         rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1242         if (IS_ERR(rule)) {
1243                 err = PTR_ERR(rule);
1244                 ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1245                 goto err_flow_rule;
1246         }
1247         pre_ct->flow_rule = rule;
1248
1249         /* add miss rule */
1250         memset(spec, 0, sizeof(*spec));
1251         dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1252         rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1253         if (IS_ERR(rule)) {
1254                 err = PTR_ERR(rule);
1255                 ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1256                 goto err_miss_rule;
1257         }
1258         pre_ct->miss_rule = rule;
1259
1260         dealloc_mod_hdr_actions(&pre_mod_acts);
1261         kvfree(spec);
1262         return 0;
1263
1264 err_miss_rule:
1265         mlx5_del_flow_rules(pre_ct->flow_rule);
1266 err_flow_rule:
1267         mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1268 err_mapping:
1269         dealloc_mod_hdr_actions(&pre_mod_acts);
1270         kvfree(spec);
1271         return err;
1272 }
1273
1274 static void
1275 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1276                        struct mlx5_tc_ct_pre *pre_ct)
1277 {
1278         struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1279         struct mlx5_core_dev *dev = ct_priv->dev;
1280
1281         mlx5_del_flow_rules(pre_ct->flow_rule);
1282         mlx5_del_flow_rules(pre_ct->miss_rule);
1283         mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1284 }
1285
1286 static int
1287 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1288                         struct mlx5_tc_ct_pre *pre_ct,
1289                         bool nat)
1290 {
1291         int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1292         struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1293         struct mlx5_core_dev *dev = ct_priv->dev;
1294         struct mlx5_flow_table_attr ft_attr = {};
1295         struct mlx5_flow_namespace *ns;
1296         struct mlx5_flow_table *ft;
1297         struct mlx5_flow_group *g;
1298         u32 metadata_reg_c_2_mask;
1299         u32 *flow_group_in;
1300         void *misc;
1301         int err;
1302
1303         ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
1304         if (!ns) {
1305                 err = -EOPNOTSUPP;
1306                 ct_dbg("Failed to get flow namespace");
1307                 return err;
1308         }
1309
1310         flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1311         if (!flow_group_in)
1312                 return -ENOMEM;
1313
1314         ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1315         ft_attr.prio =  ct_priv->ns_type ==  MLX5_FLOW_NAMESPACE_FDB ?
1316                         FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1317         ft_attr.max_fte = 2;
1318         ft_attr.level = 1;
1319         ft = mlx5_create_flow_table(ns, &ft_attr);
1320         if (IS_ERR(ft)) {
1321                 err = PTR_ERR(ft);
1322                 ct_dbg("Failed to create pre ct table");
1323                 goto out_free;
1324         }
1325         pre_ct->ft = ft;
1326
1327         /* create flow group */
1328         MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1329         MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1330         MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1331                  MLX5_MATCH_MISC_PARAMETERS_2);
1332
1333         misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1334                             match_criteria.misc_parameters_2);
1335
1336         metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1337         metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1338         if (nat)
1339                 metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1340
1341         MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1342                  metadata_reg_c_2_mask);
1343
1344         g = mlx5_create_flow_group(ft, flow_group_in);
1345         if (IS_ERR(g)) {
1346                 err = PTR_ERR(g);
1347                 ct_dbg("Failed to create pre ct group");
1348                 goto err_flow_grp;
1349         }
1350         pre_ct->flow_grp = g;
1351
1352         /* create miss group */
1353         memset(flow_group_in, 0, inlen);
1354         MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1355         MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1356         g = mlx5_create_flow_group(ft, flow_group_in);
1357         if (IS_ERR(g)) {
1358                 err = PTR_ERR(g);
1359                 ct_dbg("Failed to create pre ct miss group");
1360                 goto err_miss_grp;
1361         }
1362         pre_ct->miss_grp = g;
1363
1364         err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1365         if (err)
1366                 goto err_add_rules;
1367
1368         kvfree(flow_group_in);
1369         return 0;
1370
1371 err_add_rules:
1372         mlx5_destroy_flow_group(pre_ct->miss_grp);
1373 err_miss_grp:
1374         mlx5_destroy_flow_group(pre_ct->flow_grp);
1375 err_flow_grp:
1376         mlx5_destroy_flow_table(ft);
1377 out_free:
1378         kvfree(flow_group_in);
1379         return err;
1380 }
1381
1382 static void
1383 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1384                        struct mlx5_tc_ct_pre *pre_ct)
1385 {
1386         tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1387         mlx5_destroy_flow_group(pre_ct->miss_grp);
1388         mlx5_destroy_flow_group(pre_ct->flow_grp);
1389         mlx5_destroy_flow_table(pre_ct->ft);
1390 }
1391
1392 static int
1393 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1394 {
1395         int err;
1396
1397         err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1398         if (err)
1399                 return err;
1400
1401         err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1402         if (err)
1403                 goto err_pre_ct_nat;
1404
1405         return 0;
1406
1407 err_pre_ct_nat:
1408         mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1409         return err;
1410 }
1411
1412 static void
1413 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1414 {
1415         mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1416         mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1417 }
1418
1419 static struct mlx5_ct_ft *
1420 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1421                      struct nf_flowtable *nf_ft)
1422 {
1423         struct mlx5_ct_ft *ft;
1424         int err;
1425
1426         ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1427         if (ft) {
1428                 refcount_inc(&ft->refcount);
1429                 return ft;
1430         }
1431
1432         ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1433         if (!ft)
1434                 return ERR_PTR(-ENOMEM);
1435
1436         err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1437         if (err)
1438                 goto err_mapping;
1439
1440         ft->zone = zone;
1441         ft->nf_ft = nf_ft;
1442         ft->ct_priv = ct_priv;
1443         refcount_set(&ft->refcount, 1);
1444
1445         err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1446         if (err)
1447                 goto err_alloc_pre_ct;
1448
1449         err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1450         if (err)
1451                 goto err_init;
1452
1453         err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1454                                      zone_params);
1455         if (err)
1456                 goto err_insert;
1457
1458         err = nf_flow_table_offload_add_cb(ft->nf_ft,
1459                                            mlx5_tc_ct_block_flow_offload, ft);
1460         if (err)
1461                 goto err_add_cb;
1462
1463         return ft;
1464
1465 err_add_cb:
1466         rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1467 err_insert:
1468         rhashtable_destroy(&ft->ct_entries_ht);
1469 err_init:
1470         mlx5_tc_ct_free_pre_ct_tables(ft);
1471 err_alloc_pre_ct:
1472         mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1473 err_mapping:
1474         kfree(ft);
1475         return ERR_PTR(err);
1476 }
1477
1478 static void
1479 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1480 {
1481         struct mlx5_tc_ct_priv *ct_priv = arg;
1482         struct mlx5_ct_entry *entry = ptr;
1483
1484         mlx5_tc_ct_del_ft_entry(ct_priv, entry);
1485         kfree(entry);
1486 }
1487
1488 static void
1489 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1490 {
1491         if (!refcount_dec_and_test(&ft->refcount))
1492                 return;
1493
1494         nf_flow_table_offload_del_cb(ft->nf_ft,
1495                                      mlx5_tc_ct_block_flow_offload, ft);
1496         rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1497         rhashtable_free_and_destroy(&ft->ct_entries_ht,
1498                                     mlx5_tc_ct_flush_ft_entry,
1499                                     ct_priv);
1500         mlx5_tc_ct_free_pre_ct_tables(ft);
1501         mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1502         kfree(ft);
1503 }
1504
1505 /* We translate the tc filter with CT action to the following HW model:
1506  *
1507  * +---------------------+
1508  * + ft prio (tc chain) +
1509  * + original match      +
1510  * +---------------------+
1511  *      | set chain miss mapping
1512  *      | set fte_id
1513  *      | set tunnel_id
1514  *      | do decap
1515  *      v
1516  * +---------------------+
1517  * + pre_ct/pre_ct_nat   +  if matches     +---------------------+
1518  * + zone+nat match      +---------------->+ post_ct (see below) +
1519  * +---------------------+  set zone       +---------------------+
1520  *      | set zone
1521  *      v
1522  * +--------------------+
1523  * + CT (nat or no nat) +
1524  * + tuple + zone match +
1525  * +--------------------+
1526  *      | set mark
1527  *      | set labels_id
1528  *      | set established
1529  *      | set zone_restore
1530  *      | do nat (if needed)
1531  *      v
1532  * +--------------+
1533  * + post_ct      + original filter actions
1534  * + fte_id match +------------------------>
1535  * +--------------+
1536  */
1537 static struct mlx5_flow_handle *
1538 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1539                           struct mlx5e_tc_flow *flow,
1540                           struct mlx5_flow_spec *orig_spec,
1541                           struct mlx5_flow_attr *attr)
1542 {
1543         bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1544         struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1545         struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1546         u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1547         struct mlx5_flow_spec *post_ct_spec = NULL;
1548         struct mlx5_flow_attr *pre_ct_attr;
1549         struct mlx5_modify_hdr *mod_hdr;
1550         struct mlx5_flow_handle *rule;
1551         struct mlx5_ct_flow *ct_flow;
1552         int chain_mapping = 0, err;
1553         struct mlx5_ct_ft *ft;
1554         u32 fte_id = 1;
1555
1556         post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
1557         ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1558         if (!post_ct_spec || !ct_flow) {
1559                 kfree(post_ct_spec);
1560                 kfree(ct_flow);
1561                 return ERR_PTR(-ENOMEM);
1562         }
1563
1564         /* Register for CT established events */
1565         ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1566                                   attr->ct_attr.nf_ft);
1567         if (IS_ERR(ft)) {
1568                 err = PTR_ERR(ft);
1569                 ct_dbg("Failed to register to ft callback");
1570                 goto err_ft;
1571         }
1572         ct_flow->ft = ft;
1573
1574         err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id,
1575                             MLX5_FTE_ID_MAX, GFP_KERNEL);
1576         if (err) {
1577                 netdev_warn(priv->netdev,
1578                             "Failed to allocate fte id, err: %d\n", err);
1579                 goto err_idr;
1580         }
1581         ct_flow->fte_id = fte_id;
1582
1583         /* Base flow attributes of both rules on original rule attribute */
1584         ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1585         if (!ct_flow->pre_ct_attr) {
1586                 err = -ENOMEM;
1587                 goto err_alloc_pre;
1588         }
1589
1590         ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1591         if (!ct_flow->post_ct_attr) {
1592                 err = -ENOMEM;
1593                 goto err_alloc_post;
1594         }
1595
1596         pre_ct_attr = ct_flow->pre_ct_attr;
1597         memcpy(pre_ct_attr, attr, attr_sz);
1598         memcpy(ct_flow->post_ct_attr, attr, attr_sz);
1599
1600         /* Modify the original rule's action to fwd and modify, leave decap */
1601         pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
1602         pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1603                                MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1604
1605         /* Write chain miss tag for miss in ct table as we
1606          * don't go though all prios of this chain as normal tc rules
1607          * miss.
1608          */
1609         err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
1610                                             &chain_mapping);
1611         if (err) {
1612                 ct_dbg("Failed to get chain register mapping for chain");
1613                 goto err_get_chain;
1614         }
1615         ct_flow->chain_mapping = chain_mapping;
1616
1617         err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1618                                         CHAIN_TO_REG, chain_mapping);
1619         if (err) {
1620                 ct_dbg("Failed to set chain register mapping");
1621                 goto err_mapping;
1622         }
1623
1624         err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1625                                         FTEID_TO_REG, fte_id);
1626         if (err) {
1627                 ct_dbg("Failed to set fte_id register mapping");
1628                 goto err_mapping;
1629         }
1630
1631         /* If original flow is decap, we do it before going into ct table
1632          * so add a rewrite for the tunnel match_id.
1633          */
1634         if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
1635             attr->chain == 0) {
1636                 u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
1637
1638                 err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
1639                                                 ct_priv->ns_type,
1640                                                 TUNNEL_TO_REG,
1641                                                 tun_id);
1642                 if (err) {
1643                         ct_dbg("Failed to set tunnel register mapping");
1644                         goto err_mapping;
1645                 }
1646         }
1647
1648         mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1649                                            pre_mod_acts.num_actions,
1650                                            pre_mod_acts.actions);
1651         if (IS_ERR(mod_hdr)) {
1652                 err = PTR_ERR(mod_hdr);
1653                 ct_dbg("Failed to create pre ct mod hdr");
1654                 goto err_mapping;
1655         }
1656         pre_ct_attr->modify_hdr = mod_hdr;
1657
1658         /* Post ct rule matches on fte_id and executes original rule's
1659          * tc rule action
1660          */
1661         mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
1662                                     fte_id, MLX5_FTE_ID_MASK);
1663
1664         /* Put post_ct rule on post_ct flow table */
1665         ct_flow->post_ct_attr->chain = 0;
1666         ct_flow->post_ct_attr->prio = 0;
1667         ct_flow->post_ct_attr->ft = ct_priv->post_ct;
1668
1669         ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
1670         ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
1671         ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
1672         rule = mlx5_tc_rule_insert(priv, post_ct_spec,
1673                                    ct_flow->post_ct_attr);
1674         ct_flow->post_ct_rule = rule;
1675         if (IS_ERR(ct_flow->post_ct_rule)) {
1676                 err = PTR_ERR(ct_flow->post_ct_rule);
1677                 ct_dbg("Failed to add post ct rule");
1678                 goto err_insert_post_ct;
1679         }
1680
1681         /* Change original rule point to ct table */
1682         pre_ct_attr->dest_chain = 0;
1683         pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
1684         ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
1685                                                    pre_ct_attr);
1686         if (IS_ERR(ct_flow->pre_ct_rule)) {
1687                 err = PTR_ERR(ct_flow->pre_ct_rule);
1688                 ct_dbg("Failed to add pre ct rule");
1689                 goto err_insert_orig;
1690         }
1691
1692         attr->ct_attr.ct_flow = ct_flow;
1693         dealloc_mod_hdr_actions(&pre_mod_acts);
1694         kfree(post_ct_spec);
1695
1696         return rule;
1697
1698 err_insert_orig:
1699         mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1700                             ct_flow->post_ct_attr);
1701 err_insert_post_ct:
1702         mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1703 err_mapping:
1704         dealloc_mod_hdr_actions(&pre_mod_acts);
1705         mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1706 err_get_chain:
1707         kfree(ct_flow->post_ct_attr);
1708 err_alloc_post:
1709         kfree(ct_flow->pre_ct_attr);
1710 err_alloc_pre:
1711         idr_remove(&ct_priv->fte_ids, fte_id);
1712 err_idr:
1713         mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1714 err_ft:
1715         kfree(post_ct_spec);
1716         kfree(ct_flow);
1717         netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1718         return ERR_PTR(err);
1719 }
1720
1721 static struct mlx5_flow_handle *
1722 __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
1723                                 struct mlx5_flow_spec *orig_spec,
1724                                 struct mlx5_flow_attr *attr,
1725                                 struct mlx5e_tc_mod_hdr_acts *mod_acts)
1726 {
1727         struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1728         u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1729         struct mlx5_flow_attr *pre_ct_attr;
1730         struct mlx5_modify_hdr *mod_hdr;
1731         struct mlx5_flow_handle *rule;
1732         struct mlx5_ct_flow *ct_flow;
1733         int err;
1734
1735         ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1736         if (!ct_flow)
1737                 return ERR_PTR(-ENOMEM);
1738
1739         /* Base esw attributes on original rule attribute */
1740         pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1741         if (!pre_ct_attr) {
1742                 err = -ENOMEM;
1743                 goto err_attr;
1744         }
1745
1746         memcpy(pre_ct_attr, attr, attr_sz);
1747
1748         err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
1749         if (err) {
1750                 ct_dbg("Failed to set register for ct clear");
1751                 goto err_set_registers;
1752         }
1753
1754         mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1755                                            mod_acts->num_actions,
1756                                            mod_acts->actions);
1757         if (IS_ERR(mod_hdr)) {
1758                 err = PTR_ERR(mod_hdr);
1759                 ct_dbg("Failed to add create ct clear mod hdr");
1760                 goto err_set_registers;
1761         }
1762
1763         dealloc_mod_hdr_actions(mod_acts);
1764         pre_ct_attr->modify_hdr = mod_hdr;
1765         pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1766
1767         rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
1768         if (IS_ERR(rule)) {
1769                 err = PTR_ERR(rule);
1770                 ct_dbg("Failed to add ct clear rule");
1771                 goto err_insert;
1772         }
1773
1774         attr->ct_attr.ct_flow = ct_flow;
1775         ct_flow->pre_ct_attr = pre_ct_attr;
1776         ct_flow->pre_ct_rule = rule;
1777         return rule;
1778
1779 err_insert:
1780         mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
1781 err_set_registers:
1782         netdev_warn(priv->netdev,
1783                     "Failed to offload ct clear flow, err %d\n", err);
1784         kfree(pre_ct_attr);
1785 err_attr:
1786         kfree(ct_flow);
1787
1788         return ERR_PTR(err);
1789 }
1790
1791 struct mlx5_flow_handle *
1792 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
1793                         struct mlx5e_tc_flow *flow,
1794                         struct mlx5_flow_spec *spec,
1795                         struct mlx5_flow_attr *attr,
1796                         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
1797 {
1798         bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
1799         struct mlx5_flow_handle *rule;
1800
1801         if (!priv)
1802                 return ERR_PTR(-EOPNOTSUPP);
1803
1804         mutex_lock(&priv->control_lock);
1805
1806         if (clear_action)
1807                 rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
1808         else
1809                 rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
1810         mutex_unlock(&priv->control_lock);
1811
1812         return rule;
1813 }
1814
1815 static void
1816 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
1817                          struct mlx5e_tc_flow *flow,
1818                          struct mlx5_ct_flow *ct_flow)
1819 {
1820         struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
1821         struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1822
1823         mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
1824                             pre_ct_attr);
1825         mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1826
1827         if (ct_flow->post_ct_rule) {
1828                 mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1829                                     ct_flow->post_ct_attr);
1830                 mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1831                 idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
1832                 mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
1833         }
1834
1835         kfree(ct_flow->pre_ct_attr);
1836         kfree(ct_flow->post_ct_attr);
1837         kfree(ct_flow);
1838 }
1839
1840 void
1841 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
1842                        struct mlx5e_tc_flow *flow,
1843                        struct mlx5_flow_attr *attr)
1844 {
1845         struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
1846
1847         /* We are called on error to clean up stuff from parsing
1848          * but we don't have anything for now
1849          */
1850         if (!ct_flow)
1851                 return;
1852
1853         mutex_lock(&priv->control_lock);
1854         __mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
1855         mutex_unlock(&priv->control_lock);
1856 }
1857
1858 static int
1859 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
1860                                   const char **err_msg)
1861 {
1862         if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
1863                 *err_msg = "firmware level support is missing";
1864                 return -EOPNOTSUPP;
1865         }
1866
1867         if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
1868                 /* vlan workaround should be avoided for multi chain rules.
1869                  * This is just a sanity check as pop vlan action should
1870                  * be supported by any FW that supports ignore_flow_level
1871                  */
1872
1873                 *err_msg = "firmware vlan actions support is missing";
1874                 return -EOPNOTSUPP;
1875         }
1876
1877         if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
1878                                     fdb_modify_header_fwd_to_table)) {
1879                 /* CT always writes to registers which are mod header actions.
1880                  * Therefore, mod header and goto is required
1881                  */
1882
1883                 *err_msg = "firmware fwd and modify support is missing";
1884                 return -EOPNOTSUPP;
1885         }
1886
1887         if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
1888                 *err_msg = "register loopback isn't supported";
1889                 return -EOPNOTSUPP;
1890         }
1891
1892         return 0;
1893 }
1894
1895 static int
1896 mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv,
1897                                   const char **err_msg)
1898 {
1899         if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
1900                 *err_msg = "firmware level support is missing";
1901                 return -EOPNOTSUPP;
1902         }
1903
1904         return 0;
1905 }
1906
1907 static int
1908 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
1909                               enum mlx5_flow_namespace_type ns_type,
1910                               const char **err_msg)
1911 {
1912         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1913
1914 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1915         /* cannot restore chain ID on HW miss */
1916
1917         *err_msg = "tc skb extension missing";
1918         return -EOPNOTSUPP;
1919 #endif
1920         if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
1921                 return mlx5_tc_ct_init_check_esw_support(esw, err_msg);
1922         else
1923                 return mlx5_tc_ct_init_check_nic_support(priv, err_msg);
1924 }
1925
1926 #define INIT_ERR_PREFIX "tc ct offload init failed"
1927
1928 struct mlx5_tc_ct_priv *
1929 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
1930                 struct mod_hdr_tbl *mod_hdr,
1931                 enum mlx5_flow_namespace_type ns_type)
1932 {
1933         struct mlx5_tc_ct_priv *ct_priv;
1934         struct mlx5_core_dev *dev;
1935         const char *msg;
1936         int err;
1937
1938         dev = priv->mdev;
1939         err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg);
1940         if (err) {
1941                 mlx5_core_warn(dev,
1942                                "tc ct offload not supported, %s\n",
1943                                msg);
1944                 goto err_support;
1945         }
1946
1947         ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
1948         if (!ct_priv)
1949                 goto err_alloc;
1950
1951         ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
1952         if (IS_ERR(ct_priv->zone_mapping)) {
1953                 err = PTR_ERR(ct_priv->zone_mapping);
1954                 goto err_mapping_zone;
1955         }
1956
1957         ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
1958         if (IS_ERR(ct_priv->labels_mapping)) {
1959                 err = PTR_ERR(ct_priv->labels_mapping);
1960                 goto err_mapping_labels;
1961         }
1962
1963         ct_priv->ns_type = ns_type;
1964         ct_priv->chains = chains;
1965         ct_priv->netdev = priv->netdev;
1966         ct_priv->dev = priv->mdev;
1967         ct_priv->mod_hdr_tbl = mod_hdr;
1968         ct_priv->ct = mlx5_chains_create_global_table(chains);
1969         if (IS_ERR(ct_priv->ct)) {
1970                 err = PTR_ERR(ct_priv->ct);
1971                 mlx5_core_warn(dev,
1972                                "%s, failed to create ct table err: %d\n",
1973                                INIT_ERR_PREFIX, err);
1974                 goto err_ct_tbl;
1975         }
1976
1977         ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
1978         if (IS_ERR(ct_priv->ct_nat)) {
1979                 err = PTR_ERR(ct_priv->ct_nat);
1980                 mlx5_core_warn(dev,
1981                                "%s, failed to create ct nat table err: %d\n",
1982                                INIT_ERR_PREFIX, err);
1983                 goto err_ct_nat_tbl;
1984         }
1985
1986         ct_priv->post_ct = mlx5_chains_create_global_table(chains);
1987         if (IS_ERR(ct_priv->post_ct)) {
1988                 err = PTR_ERR(ct_priv->post_ct);
1989                 mlx5_core_warn(dev,
1990                                "%s, failed to create post ct table err: %d\n",
1991                                INIT_ERR_PREFIX, err);
1992                 goto err_post_ct_tbl;
1993         }
1994
1995         idr_init(&ct_priv->fte_ids);
1996         mutex_init(&ct_priv->control_lock);
1997         mutex_init(&ct_priv->shared_counter_lock);
1998         rhashtable_init(&ct_priv->zone_ht, &zone_params);
1999         rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
2000         rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
2001
2002         return ct_priv;
2003
2004 err_post_ct_tbl:
2005         mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2006 err_ct_nat_tbl:
2007         mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2008 err_ct_tbl:
2009         mapping_destroy(ct_priv->labels_mapping);
2010 err_mapping_labels:
2011         mapping_destroy(ct_priv->zone_mapping);
2012 err_mapping_zone:
2013         kfree(ct_priv);
2014 err_alloc:
2015 err_support:
2016
2017         return NULL;
2018 }
2019
2020 void
2021 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
2022 {
2023         struct mlx5_fs_chains *chains;
2024
2025         if (!ct_priv)
2026                 return;
2027
2028         chains = ct_priv->chains;
2029
2030         mlx5_chains_destroy_global_table(chains, ct_priv->post_ct);
2031         mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2032         mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2033         mapping_destroy(ct_priv->zone_mapping);
2034         mapping_destroy(ct_priv->labels_mapping);
2035
2036         rhashtable_destroy(&ct_priv->ct_tuples_ht);
2037         rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2038         rhashtable_destroy(&ct_priv->zone_ht);
2039         mutex_destroy(&ct_priv->control_lock);
2040         mutex_destroy(&ct_priv->shared_counter_lock);
2041         idr_destroy(&ct_priv->fte_ids);
2042         kfree(ct_priv);
2043 }
2044
2045 bool
2046 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2047                          struct sk_buff *skb, u8 zone_restore_id)
2048 {
2049         struct mlx5_ct_tuple tuple = {};
2050         struct mlx5_ct_entry *entry;
2051         u16 zone;
2052
2053         if (!ct_priv || !zone_restore_id)
2054                 return true;
2055
2056         if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
2057                 return false;
2058
2059         if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
2060                 return false;
2061
2062         entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &tuple,
2063                                        tuples_ht_params);
2064         if (!entry)
2065                 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
2066                                                &tuple, tuples_nat_ht_params);
2067         if (!entry)
2068                 return false;
2069
2070         tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
2071         return true;
2072 }