3a31195f0d9ce086afdf17c748a661c7937b2c92
[sfrench/cifs-2.6.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_tc.c
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <net/flow_dissector.h>
34 #include <net/sch_generic.h>
35 #include <net/pkt_cls.h>
36 #include <net/tc_act/tc_gact.h>
37 #include <net/tc_act/tc_skbedit.h>
38 #include <linux/mlx5/fs.h>
39 #include <linux/mlx5/device.h>
40 #include <linux/rhashtable.h>
41 #include <net/switchdev.h>
42 #include <net/tc_act/tc_mirred.h>
43 #include <net/tc_act/tc_vlan.h>
44 #include <net/tc_act/tc_tunnel_key.h>
45 #include <net/tc_act/tc_pedit.h>
46 #include <net/vxlan.h>
47 #include "en.h"
48 #include "en_tc.h"
49 #include "eswitch.h"
50 #include "vxlan.h"
51
52 struct mlx5_nic_flow_attr {
53         u32 action;
54         u32 flow_tag;
55 };
56
57 enum {
58         MLX5E_TC_FLOW_ESWITCH   = BIT(0),
59         MLX5E_TC_FLOW_NIC       = BIT(1),
60 };
61
62 struct mlx5e_tc_flow {
63         struct rhash_head       node;
64         u64                     cookie;
65         u8                      flags;
66         struct mlx5_flow_handle *rule;
67         struct list_head        encap; /* flows sharing the same encap */
68         union {
69                 struct mlx5_esw_flow_attr esw_attr[0];
70                 struct mlx5_nic_flow_attr nic_attr[0];
71         };
72 };
73
74 struct mlx5e_tc_flow_parse_attr {
75         struct mlx5_flow_spec spec;
76         int num_mod_hdr_actions;
77         void *mod_hdr_actions;
78 };
79
80 enum {
81         MLX5_HEADER_TYPE_VXLAN = 0x0,
82         MLX5_HEADER_TYPE_NVGRE = 0x1,
83 };
84
85 #define MLX5E_TC_TABLE_NUM_ENTRIES 1024
86 #define MLX5E_TC_TABLE_NUM_GROUPS 4
87
88 static struct mlx5_flow_handle *
89 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
90                       struct mlx5e_tc_flow_parse_attr *parse_attr,
91                       struct mlx5e_tc_flow *flow)
92 {
93         struct mlx5_nic_flow_attr *attr = flow->nic_attr;
94         struct mlx5_core_dev *dev = priv->mdev;
95         struct mlx5_flow_destination dest = {};
96         struct mlx5_flow_act flow_act = {
97                 .action = attr->action,
98                 .flow_tag = attr->flow_tag,
99                 .encap_id = 0,
100         };
101         struct mlx5_fc *counter = NULL;
102         struct mlx5_flow_handle *rule;
103         bool table_created = false;
104
105         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
106                 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
107                 dest.ft = priv->fs.vlan.ft.t;
108         } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
109                 counter = mlx5_fc_create(dev, true);
110                 if (IS_ERR(counter))
111                         return ERR_CAST(counter);
112
113                 dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
114                 dest.counter = counter;
115         }
116
117         if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
118                 priv->fs.tc.t =
119                         mlx5_create_auto_grouped_flow_table(priv->fs.ns,
120                                                             MLX5E_TC_PRIO,
121                                                             MLX5E_TC_TABLE_NUM_ENTRIES,
122                                                             MLX5E_TC_TABLE_NUM_GROUPS,
123                                                             0, 0);
124                 if (IS_ERR(priv->fs.tc.t)) {
125                         netdev_err(priv->netdev,
126                                    "Failed to create tc offload table\n");
127                         rule = ERR_CAST(priv->fs.tc.t);
128                         goto err_create_ft;
129                 }
130
131                 table_created = true;
132         }
133
134         parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
135         rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
136                                    &flow_act, &dest, 1);
137
138         if (IS_ERR(rule))
139                 goto err_add_rule;
140
141         return rule;
142
143 err_add_rule:
144         if (table_created) {
145                 mlx5_destroy_flow_table(priv->fs.tc.t);
146                 priv->fs.tc.t = NULL;
147         }
148 err_create_ft:
149         mlx5_fc_destroy(dev, counter);
150
151         return rule;
152 }
153
154 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
155                                   struct mlx5e_tc_flow *flow)
156 {
157         struct mlx5_fc *counter = NULL;
158
159         counter = mlx5_flow_rule_counter(flow->rule);
160         mlx5_del_flow_rules(flow->rule);
161         mlx5_fc_destroy(priv->mdev, counter);
162
163         if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
164                 mlx5_destroy_flow_table(priv->fs.tc.t);
165                 priv->fs.tc.t = NULL;
166         }
167 }
168
169 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
170                                struct mlx5e_tc_flow *flow);
171
172 static struct mlx5_flow_handle *
173 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
174                       struct mlx5e_tc_flow_parse_attr *parse_attr,
175                       struct mlx5e_tc_flow *flow)
176 {
177         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
178         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
179         struct mlx5_flow_handle *rule;
180         int err;
181
182         err = mlx5_eswitch_add_vlan_action(esw, attr);
183         if (err) {
184                 rule = ERR_PTR(err);
185                 goto err_add_vlan;
186         }
187
188         rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
189         if (IS_ERR(rule))
190                 goto err_add_rule;
191
192         return rule;
193
194 err_add_rule:
195         mlx5_eswitch_del_vlan_action(esw, attr);
196 err_add_vlan:
197         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
198                 mlx5e_detach_encap(priv, flow);
199
200         return rule;
201 }
202
203 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
204                                   struct mlx5e_tc_flow *flow)
205 {
206         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
207
208         mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr);
209
210         mlx5_eswitch_del_vlan_action(esw, flow->esw_attr);
211
212         if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
213                 mlx5e_detach_encap(priv, flow);
214 }
215
216 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
217                                struct mlx5e_tc_flow *flow)
218 {
219         struct list_head *next = flow->encap.next;
220
221         list_del(&flow->encap);
222         if (list_empty(next)) {
223                 struct mlx5_encap_entry *e;
224
225                 e = list_entry(next, struct mlx5_encap_entry, flows);
226                 if (e->n) {
227                         mlx5_encap_dealloc(priv->mdev, e->encap_id);
228                         neigh_release(e->n);
229                 }
230                 hlist_del_rcu(&e->encap_hlist);
231                 kfree(e);
232         }
233 }
234
235 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
236                               struct mlx5e_tc_flow *flow)
237 {
238         if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
239                 mlx5e_tc_del_fdb_flow(priv, flow);
240         else
241                 mlx5e_tc_del_nic_flow(priv, flow);
242 }
243
244 static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
245                              struct tc_cls_flower_offload *f)
246 {
247         void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
248                                        outer_headers);
249         void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
250                                        outer_headers);
251         void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
252                                     misc_parameters);
253         void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
254                                     misc_parameters);
255
256         MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
257         MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
258
259         if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
260                 struct flow_dissector_key_keyid *key =
261                         skb_flow_dissector_target(f->dissector,
262                                                   FLOW_DISSECTOR_KEY_ENC_KEYID,
263                                                   f->key);
264                 struct flow_dissector_key_keyid *mask =
265                         skb_flow_dissector_target(f->dissector,
266                                                   FLOW_DISSECTOR_KEY_ENC_KEYID,
267                                                   f->mask);
268                 MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
269                          be32_to_cpu(mask->keyid));
270                 MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
271                          be32_to_cpu(key->keyid));
272         }
273 }
274
275 static int parse_tunnel_attr(struct mlx5e_priv *priv,
276                              struct mlx5_flow_spec *spec,
277                              struct tc_cls_flower_offload *f)
278 {
279         void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
280                                        outer_headers);
281         void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
282                                        outer_headers);
283
284         struct flow_dissector_key_control *enc_control =
285                 skb_flow_dissector_target(f->dissector,
286                                           FLOW_DISSECTOR_KEY_ENC_CONTROL,
287                                           f->key);
288
289         if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
290                 struct flow_dissector_key_ports *key =
291                         skb_flow_dissector_target(f->dissector,
292                                                   FLOW_DISSECTOR_KEY_ENC_PORTS,
293                                                   f->key);
294                 struct flow_dissector_key_ports *mask =
295                         skb_flow_dissector_target(f->dissector,
296                                                   FLOW_DISSECTOR_KEY_ENC_PORTS,
297                                                   f->mask);
298                 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
299                 struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
300                 struct mlx5e_priv *up_priv = netdev_priv(up_dev);
301
302                 /* Full udp dst port must be given */
303                 if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
304                         goto vxlan_match_offload_err;
305
306                 if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->dst)) &&
307                     MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
308                         parse_vxlan_attr(spec, f);
309                 else {
310                         netdev_warn(priv->netdev,
311                                     "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst));
312                         return -EOPNOTSUPP;
313                 }
314
315                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
316                          udp_dport, ntohs(mask->dst));
317                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
318                          udp_dport, ntohs(key->dst));
319
320                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
321                          udp_sport, ntohs(mask->src));
322                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
323                          udp_sport, ntohs(key->src));
324         } else { /* udp dst port must be given */
325 vxlan_match_offload_err:
326                 netdev_warn(priv->netdev,
327                             "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
328                 return -EOPNOTSUPP;
329         }
330
331         if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
332                 struct flow_dissector_key_ipv4_addrs *key =
333                         skb_flow_dissector_target(f->dissector,
334                                                   FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
335                                                   f->key);
336                 struct flow_dissector_key_ipv4_addrs *mask =
337                         skb_flow_dissector_target(f->dissector,
338                                                   FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
339                                                   f->mask);
340                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
341                          src_ipv4_src_ipv6.ipv4_layout.ipv4,
342                          ntohl(mask->src));
343                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
344                          src_ipv4_src_ipv6.ipv4_layout.ipv4,
345                          ntohl(key->src));
346
347                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
348                          dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
349                          ntohl(mask->dst));
350                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
351                          dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
352                          ntohl(key->dst));
353
354                 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
355                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
356         } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
357                 struct flow_dissector_key_ipv6_addrs *key =
358                         skb_flow_dissector_target(f->dissector,
359                                                   FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
360                                                   f->key);
361                 struct flow_dissector_key_ipv6_addrs *mask =
362                         skb_flow_dissector_target(f->dissector,
363                                                   FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
364                                                   f->mask);
365
366                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
367                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
368                        &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
369                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
370                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
371                        &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
372
373                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
374                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
375                        &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
376                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
377                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
378                        &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
379
380                 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
381                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6);
382         }
383
384         /* Enforce DMAC when offloading incoming tunneled flows.
385          * Flow counters require a match on the DMAC.
386          */
387         MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
388         MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
389         ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
390                                      dmac_47_16), priv->netdev->dev_addr);
391
392         /* let software handle IP fragments */
393         MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
394         MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
395
396         return 0;
397 }
398
399 static int __parse_cls_flower(struct mlx5e_priv *priv,
400                               struct mlx5_flow_spec *spec,
401                               struct tc_cls_flower_offload *f,
402                               u8 *min_inline)
403 {
404         void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
405                                        outer_headers);
406         void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
407                                        outer_headers);
408         u16 addr_type = 0;
409         u8 ip_proto = 0;
410
411         *min_inline = MLX5_INLINE_MODE_L2;
412
413         if (f->dissector->used_keys &
414             ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
415               BIT(FLOW_DISSECTOR_KEY_BASIC) |
416               BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
417               BIT(FLOW_DISSECTOR_KEY_VLAN) |
418               BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
419               BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
420               BIT(FLOW_DISSECTOR_KEY_PORTS) |
421               BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
422               BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
423               BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
424               BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
425               BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL))) {
426                 netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
427                             f->dissector->used_keys);
428                 return -EOPNOTSUPP;
429         }
430
431         if ((dissector_uses_key(f->dissector,
432                                 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
433              dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
434              dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) &&
435             dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
436                 struct flow_dissector_key_control *key =
437                         skb_flow_dissector_target(f->dissector,
438                                                   FLOW_DISSECTOR_KEY_ENC_CONTROL,
439                                                   f->key);
440                 switch (key->addr_type) {
441                 case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
442                 case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
443                         if (parse_tunnel_attr(priv, spec, f))
444                                 return -EOPNOTSUPP;
445                         break;
446                 default:
447                         return -EOPNOTSUPP;
448                 }
449
450                 /* In decap flow, header pointers should point to the inner
451                  * headers, outer header were already set by parse_tunnel_attr
452                  */
453                 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
454                                          inner_headers);
455                 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
456                                          inner_headers);
457         }
458
459         if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
460                 struct flow_dissector_key_control *key =
461                         skb_flow_dissector_target(f->dissector,
462                                                   FLOW_DISSECTOR_KEY_CONTROL,
463                                                   f->key);
464
465                 struct flow_dissector_key_control *mask =
466                         skb_flow_dissector_target(f->dissector,
467                                                   FLOW_DISSECTOR_KEY_CONTROL,
468                                                   f->mask);
469                 addr_type = key->addr_type;
470
471                 if (mask->flags & FLOW_DIS_IS_FRAGMENT) {
472                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
473                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
474                                  key->flags & FLOW_DIS_IS_FRAGMENT);
475
476                         /* the HW doesn't need L3 inline to match on frag=no */
477                         if (key->flags & FLOW_DIS_IS_FRAGMENT)
478                                 *min_inline = MLX5_INLINE_MODE_IP;
479                 }
480         }
481
482         if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
483                 struct flow_dissector_key_basic *key =
484                         skb_flow_dissector_target(f->dissector,
485                                                   FLOW_DISSECTOR_KEY_BASIC,
486                                                   f->key);
487                 struct flow_dissector_key_basic *mask =
488                         skb_flow_dissector_target(f->dissector,
489                                                   FLOW_DISSECTOR_KEY_BASIC,
490                                                   f->mask);
491                 ip_proto = key->ip_proto;
492
493                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
494                          ntohs(mask->n_proto));
495                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
496                          ntohs(key->n_proto));
497
498                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
499                          mask->ip_proto);
500                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
501                          key->ip_proto);
502
503                 if (mask->ip_proto)
504                         *min_inline = MLX5_INLINE_MODE_IP;
505         }
506
507         if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
508                 struct flow_dissector_key_eth_addrs *key =
509                         skb_flow_dissector_target(f->dissector,
510                                                   FLOW_DISSECTOR_KEY_ETH_ADDRS,
511                                                   f->key);
512                 struct flow_dissector_key_eth_addrs *mask =
513                         skb_flow_dissector_target(f->dissector,
514                                                   FLOW_DISSECTOR_KEY_ETH_ADDRS,
515                                                   f->mask);
516
517                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
518                                              dmac_47_16),
519                                 mask->dst);
520                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
521                                              dmac_47_16),
522                                 key->dst);
523
524                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
525                                              smac_47_16),
526                                 mask->src);
527                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
528                                              smac_47_16),
529                                 key->src);
530         }
531
532         if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
533                 struct flow_dissector_key_vlan *key =
534                         skb_flow_dissector_target(f->dissector,
535                                                   FLOW_DISSECTOR_KEY_VLAN,
536                                                   f->key);
537                 struct flow_dissector_key_vlan *mask =
538                         skb_flow_dissector_target(f->dissector,
539                                                   FLOW_DISSECTOR_KEY_VLAN,
540                                                   f->mask);
541                 if (mask->vlan_id || mask->vlan_priority) {
542                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
543                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
544
545                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
546                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
547
548                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority);
549                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority);
550                 }
551         }
552
553         if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
554                 struct flow_dissector_key_ipv4_addrs *key =
555                         skb_flow_dissector_target(f->dissector,
556                                                   FLOW_DISSECTOR_KEY_IPV4_ADDRS,
557                                                   f->key);
558                 struct flow_dissector_key_ipv4_addrs *mask =
559                         skb_flow_dissector_target(f->dissector,
560                                                   FLOW_DISSECTOR_KEY_IPV4_ADDRS,
561                                                   f->mask);
562
563                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
564                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
565                        &mask->src, sizeof(mask->src));
566                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
567                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
568                        &key->src, sizeof(key->src));
569                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
570                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
571                        &mask->dst, sizeof(mask->dst));
572                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
573                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
574                        &key->dst, sizeof(key->dst));
575
576                 if (mask->src || mask->dst)
577                         *min_inline = MLX5_INLINE_MODE_IP;
578         }
579
580         if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
581                 struct flow_dissector_key_ipv6_addrs *key =
582                         skb_flow_dissector_target(f->dissector,
583                                                   FLOW_DISSECTOR_KEY_IPV6_ADDRS,
584                                                   f->key);
585                 struct flow_dissector_key_ipv6_addrs *mask =
586                         skb_flow_dissector_target(f->dissector,
587                                                   FLOW_DISSECTOR_KEY_IPV6_ADDRS,
588                                                   f->mask);
589
590                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
591                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
592                        &mask->src, sizeof(mask->src));
593                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
594                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
595                        &key->src, sizeof(key->src));
596
597                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
598                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
599                        &mask->dst, sizeof(mask->dst));
600                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
601                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
602                        &key->dst, sizeof(key->dst));
603
604                 if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY ||
605                     ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY)
606                         *min_inline = MLX5_INLINE_MODE_IP;
607         }
608
609         if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
610                 struct flow_dissector_key_ports *key =
611                         skb_flow_dissector_target(f->dissector,
612                                                   FLOW_DISSECTOR_KEY_PORTS,
613                                                   f->key);
614                 struct flow_dissector_key_ports *mask =
615                         skb_flow_dissector_target(f->dissector,
616                                                   FLOW_DISSECTOR_KEY_PORTS,
617                                                   f->mask);
618                 switch (ip_proto) {
619                 case IPPROTO_TCP:
620                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
621                                  tcp_sport, ntohs(mask->src));
622                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
623                                  tcp_sport, ntohs(key->src));
624
625                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
626                                  tcp_dport, ntohs(mask->dst));
627                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
628                                  tcp_dport, ntohs(key->dst));
629                         break;
630
631                 case IPPROTO_UDP:
632                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
633                                  udp_sport, ntohs(mask->src));
634                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
635                                  udp_sport, ntohs(key->src));
636
637                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
638                                  udp_dport, ntohs(mask->dst));
639                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
640                                  udp_dport, ntohs(key->dst));
641                         break;
642                 default:
643                         netdev_err(priv->netdev,
644                                    "Only UDP and TCP transport are supported\n");
645                         return -EINVAL;
646                 }
647
648                 if (mask->src || mask->dst)
649                         *min_inline = MLX5_INLINE_MODE_TCP_UDP;
650         }
651
652         return 0;
653 }
654
655 static int parse_cls_flower(struct mlx5e_priv *priv,
656                             struct mlx5e_tc_flow *flow,
657                             struct mlx5_flow_spec *spec,
658                             struct tc_cls_flower_offload *f)
659 {
660         struct mlx5_core_dev *dev = priv->mdev;
661         struct mlx5_eswitch *esw = dev->priv.eswitch;
662         struct mlx5_eswitch_rep *rep = priv->ppriv;
663         u8 min_inline;
664         int err;
665
666         err = __parse_cls_flower(priv, spec, f, &min_inline);
667
668         if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) &&
669             rep->vport != FDB_UPLINK_VPORT) {
670                 if (min_inline > esw->offloads.inline_mode) {
671                         netdev_warn(priv->netdev,
672                                     "Flow is not offloaded due to min inline setting, required %d actual %d\n",
673                                     min_inline, esw->offloads.inline_mode);
674                         return -EOPNOTSUPP;
675                 }
676         }
677
678         return err;
679 }
680
681 struct pedit_headers {
682         struct ethhdr  eth;
683         struct iphdr   ip4;
684         struct ipv6hdr ip6;
685         struct tcphdr  tcp;
686         struct udphdr  udp;
687 };
688
689 static int pedit_header_offsets[] = {
690         [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
691         [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
692         [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
693         [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
694         [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
695 };
696
697 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
698
699 static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
700                          struct pedit_headers *masks,
701                          struct pedit_headers *vals)
702 {
703         u32 *curr_pmask, *curr_pval;
704
705         if (hdr_type >= __PEDIT_HDR_TYPE_MAX)
706                 goto out_err;
707
708         curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset);
709         curr_pval  = (u32 *)(pedit_header(vals, hdr_type) + offset);
710
711         if (*curr_pmask & mask)  /* disallow acting twice on the same location */
712                 goto out_err;
713
714         *curr_pmask |= mask;
715         *curr_pval  |= (val & mask);
716
717         return 0;
718
719 out_err:
720         return -EOPNOTSUPP;
721 }
722
723 struct mlx5_fields {
724         u8  field;
725         u8  size;
726         u32 offset;
727 };
728
729 static struct mlx5_fields fields[] = {
730         {MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16, 4, offsetof(struct pedit_headers, eth.h_dest[0])},
731         {MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0,  2, offsetof(struct pedit_headers, eth.h_dest[4])},
732         {MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16, 4, offsetof(struct pedit_headers, eth.h_source[0])},
733         {MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0,  2, offsetof(struct pedit_headers, eth.h_source[4])},
734         {MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE,  2, offsetof(struct pedit_headers, eth.h_proto)},
735
736         {MLX5_ACTION_IN_FIELD_OUT_IP_DSCP, 1, offsetof(struct pedit_headers, ip4.tos)},
737         {MLX5_ACTION_IN_FIELD_OUT_IP_TTL,  1, offsetof(struct pedit_headers, ip4.ttl)},
738         {MLX5_ACTION_IN_FIELD_OUT_SIPV4,   4, offsetof(struct pedit_headers, ip4.saddr)},
739         {MLX5_ACTION_IN_FIELD_OUT_DIPV4,   4, offsetof(struct pedit_headers, ip4.daddr)},
740
741         {MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[0])},
742         {MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64,  4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[1])},
743         {MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32,  4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[2])},
744         {MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0,   4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[3])},
745         {MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[0])},
746         {MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64,  4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[1])},
747         {MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32,  4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[2])},
748         {MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0,   4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[3])},
749
750         {MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT, 2, offsetof(struct pedit_headers, tcp.source)},
751         {MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT, 2, offsetof(struct pedit_headers, tcp.dest)},
752         {MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS, 1, offsetof(struct pedit_headers, tcp.ack_seq) + 5},
753
754         {MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT, 2, offsetof(struct pedit_headers, udp.source)},
755         {MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT, 2, offsetof(struct pedit_headers, udp.dest)},
756 };
757
758 /* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at
759  * max from the SW pedit action. On success, it says how many HW actions were
760  * actually parsed.
761  */
762 static int offload_pedit_fields(struct pedit_headers *masks,
763                                 struct pedit_headers *vals,
764                                 struct mlx5e_tc_flow_parse_attr *parse_attr)
765 {
766         struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
767         int i, action_size, nactions, max_actions, first, last;
768         void *s_masks_p, *a_masks_p, *vals_p;
769         u32 s_mask, a_mask, val;
770         struct mlx5_fields *f;
771         u8 cmd, field_bsize;
772         unsigned long mask;
773         void *action;
774
775         set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET];
776         add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD];
777         set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET];
778         add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD];
779
780         action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
781         action = parse_attr->mod_hdr_actions;
782         max_actions = parse_attr->num_mod_hdr_actions;
783         nactions = 0;
784
785         for (i = 0; i < ARRAY_SIZE(fields); i++) {
786                 f = &fields[i];
787                 /* avoid seeing bits set from previous iterations */
788                 s_mask = a_mask = mask = val = 0;
789
790                 s_masks_p = (void *)set_masks + f->offset;
791                 a_masks_p = (void *)add_masks + f->offset;
792
793                 memcpy(&s_mask, s_masks_p, f->size);
794                 memcpy(&a_mask, a_masks_p, f->size);
795
796                 if (!s_mask && !a_mask) /* nothing to offload here */
797                         continue;
798
799                 if (s_mask && a_mask) {
800                         printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
801                         return -EOPNOTSUPP;
802                 }
803
804                 if (nactions == max_actions) {
805                         printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions);
806                         return -EOPNOTSUPP;
807                 }
808
809                 if (s_mask) {
810                         cmd  = MLX5_ACTION_TYPE_SET;
811                         mask = s_mask;
812                         vals_p = (void *)set_vals + f->offset;
813                         /* clear to denote we consumed this field */
814                         memset(s_masks_p, 0, f->size);
815                 } else {
816                         cmd  = MLX5_ACTION_TYPE_ADD;
817                         mask = a_mask;
818                         vals_p = (void *)add_vals + f->offset;
819                         /* clear to denote we consumed this field */
820                         memset(a_masks_p, 0, f->size);
821                 }
822
823                 memcpy(&val, vals_p, f->size);
824
825                 field_bsize = f->size * BITS_PER_BYTE;
826                 first = find_first_bit(&mask, field_bsize);
827                 last  = find_last_bit(&mask, field_bsize);
828                 if (first > 0 || last != (field_bsize - 1)) {
829                         printk(KERN_WARNING "mlx5: partial rewrite (mask %lx) is currently not offloaded\n",
830                                mask);
831                         return -EOPNOTSUPP;
832                 }
833
834                 MLX5_SET(set_action_in, action, action_type, cmd);
835                 MLX5_SET(set_action_in, action, field, f->field);
836
837                 if (cmd == MLX5_ACTION_TYPE_SET) {
838                         MLX5_SET(set_action_in, action, offset, 0);
839                         /* length is num of bits to be written, zero means length of 32 */
840                         MLX5_SET(set_action_in, action, length, field_bsize);
841                 }
842
843                 if (field_bsize == 32)
844                         MLX5_SET(set_action_in, action, data, ntohl(val));
845                 else if (field_bsize == 16)
846                         MLX5_SET(set_action_in, action, data, ntohs(val));
847                 else if (field_bsize == 8)
848                         MLX5_SET(set_action_in, action, data, val);
849
850                 action += action_size;
851                 nactions++;
852         }
853
854         parse_attr->num_mod_hdr_actions = nactions;
855         return 0;
856 }
857
858 static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
859                                  const struct tc_action *a, int namespace,
860                                  struct mlx5e_tc_flow_parse_attr *parse_attr)
861 {
862         int nkeys, action_size, max_actions;
863
864         nkeys = tcf_pedit_nkeys(a);
865         action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
866
867         if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
868                 max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, max_modify_header_actions);
869         else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
870                 max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, max_modify_header_actions);
871
872         /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
873         max_actions = min(max_actions, nkeys * 16);
874
875         parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL);
876         if (!parse_attr->mod_hdr_actions)
877                 return -ENOMEM;
878
879         parse_attr->num_mod_hdr_actions = max_actions;
880         return 0;
881 }
882
883 static const struct pedit_headers zero_masks = {};
884
885 static int parse_tc_pedit_action(struct mlx5e_priv *priv,
886                                  const struct tc_action *a, int namespace,
887                                  struct mlx5e_tc_flow_parse_attr *parse_attr)
888 {
889         struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks;
890         int nkeys, i, err = -EOPNOTSUPP;
891         u32 mask, val, offset;
892         u8 cmd, htype;
893
894         nkeys = tcf_pedit_nkeys(a);
895
896         memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
897         memset(vals,  0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
898
899         for (i = 0; i < nkeys; i++) {
900                 htype = tcf_pedit_htype(a, i);
901                 cmd = tcf_pedit_cmd(a, i);
902                 err = -EOPNOTSUPP; /* can't be all optimistic */
903
904                 if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) {
905                         printk(KERN_WARNING "mlx5: legacy pedit isn't offloaded\n");
906                         goto out_err;
907                 }
908
909                 if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) {
910                         printk(KERN_WARNING "mlx5: pedit cmd %d isn't offloaded\n", cmd);
911                         goto out_err;
912                 }
913
914                 mask = tcf_pedit_mask(a, i);
915                 val = tcf_pedit_val(a, i);
916                 offset = tcf_pedit_offset(a, i);
917
918                 err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]);
919                 if (err)
920                         goto out_err;
921         }
922
923         err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr);
924         if (err)
925                 goto out_err;
926
927         err = offload_pedit_fields(masks, vals, parse_attr);
928         if (err < 0)
929                 goto out_dealloc_parsed_actions;
930
931         for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
932                 cmd_masks = &masks[cmd];
933                 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
934                         printk(KERN_WARNING "mlx5: attempt to offload an unsupported field (cmd %d)\n",
935                                cmd);
936                         print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
937                                        16, 1, cmd_masks, sizeof(zero_masks), true);
938                         err = -EOPNOTSUPP;
939                         goto out_dealloc_parsed_actions;
940                 }
941         }
942
943         return 0;
944
945 out_dealloc_parsed_actions:
946         kfree(parse_attr->mod_hdr_actions);
947 out_err:
948         return err;
949 }
950
951 static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
952                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
953                                 struct mlx5e_tc_flow *flow)
954 {
955         struct mlx5_nic_flow_attr *attr = flow->nic_attr;
956         const struct tc_action *a;
957         LIST_HEAD(actions);
958
959         if (tc_no_actions(exts))
960                 return -EINVAL;
961
962         attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
963         attr->action = 0;
964
965         tcf_exts_to_list(exts, &actions);
966         list_for_each_entry(a, &actions, list) {
967                 /* Only support a single action per rule */
968                 if (attr->action)
969                         return -EINVAL;
970
971                 if (is_tcf_gact_shot(a)) {
972                         attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
973                         if (MLX5_CAP_FLOWTABLE(priv->mdev,
974                                                flow_table_properties_nic_receive.flow_counter))
975                                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
976                         continue;
977                 }
978
979                 if (is_tcf_skbedit_mark(a)) {
980                         u32 mark = tcf_skbedit_mark(a);
981
982                         if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
983                                 netdev_warn(priv->netdev, "Bad flow mark - only 16 bit is supported: 0x%x\n",
984                                             mark);
985                                 return -EINVAL;
986                         }
987
988                         attr->flow_tag = mark;
989                         attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
990                         continue;
991                 }
992
993                 return -EINVAL;
994         }
995
996         return 0;
997 }
998
999 static inline int cmp_encap_info(struct ip_tunnel_key *a,
1000                                  struct ip_tunnel_key *b)
1001 {
1002         return memcmp(a, b, sizeof(*a));
1003 }
1004
1005 static inline int hash_encap_info(struct ip_tunnel_key *key)
1006 {
1007         return jhash(key, sizeof(*key), 0);
1008 }
1009
1010 static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
1011                                    struct net_device *mirred_dev,
1012                                    struct net_device **out_dev,
1013                                    struct flowi4 *fl4,
1014                                    struct neighbour **out_n,
1015                                    int *out_ttl)
1016 {
1017         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1018         struct rtable *rt;
1019         struct neighbour *n = NULL;
1020
1021 #if IS_ENABLED(CONFIG_INET)
1022         int ret;
1023
1024         rt = ip_route_output_key(dev_net(mirred_dev), fl4);
1025         ret = PTR_ERR_OR_ZERO(rt);
1026         if (ret)
1027                 return ret;
1028 #else
1029         return -EOPNOTSUPP;
1030 #endif
1031         /* if the egress device isn't on the same HW e-switch, we use the uplink */
1032         if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
1033                 *out_dev = mlx5_eswitch_get_uplink_netdev(esw);
1034         else
1035                 *out_dev = rt->dst.dev;
1036
1037         *out_ttl = ip4_dst_hoplimit(&rt->dst);
1038         n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
1039         ip_rt_put(rt);
1040         if (!n)
1041                 return -ENOMEM;
1042
1043         *out_n = n;
1044         return 0;
1045 }
1046
1047 static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
1048                                    struct net_device *mirred_dev,
1049                                    struct net_device **out_dev,
1050                                    struct flowi6 *fl6,
1051                                    struct neighbour **out_n,
1052                                    int *out_ttl)
1053 {
1054         struct neighbour *n = NULL;
1055         struct dst_entry *dst;
1056
1057 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
1058         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1059         int ret;
1060
1061         dst = ip6_route_output(dev_net(mirred_dev), NULL, fl6);
1062         ret = dst->error;
1063         if (ret) {
1064                 dst_release(dst);
1065                 return ret;
1066         }
1067
1068         *out_ttl = ip6_dst_hoplimit(dst);
1069
1070         /* if the egress device isn't on the same HW e-switch, we use the uplink */
1071         if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
1072                 *out_dev = mlx5_eswitch_get_uplink_netdev(esw);
1073         else
1074                 *out_dev = dst->dev;
1075 #else
1076         return -EOPNOTSUPP;
1077 #endif
1078
1079         n = dst_neigh_lookup(dst, &fl6->daddr);
1080         dst_release(dst);
1081         if (!n)
1082                 return -ENOMEM;
1083
1084         *out_n = n;
1085         return 0;
1086 }
1087
1088 static int gen_vxlan_header_ipv4(struct net_device *out_dev,
1089                                  char buf[],
1090                                  unsigned char h_dest[ETH_ALEN],
1091                                  int ttl,
1092                                  __be32 daddr,
1093                                  __be32 saddr,
1094                                  __be16 udp_dst_port,
1095                                  __be32 vx_vni)
1096 {
1097         int encap_size = VXLAN_HLEN + sizeof(struct iphdr) + ETH_HLEN;
1098         struct ethhdr *eth = (struct ethhdr *)buf;
1099         struct iphdr  *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
1100         struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
1101         struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
1102
1103         memset(buf, 0, encap_size);
1104
1105         ether_addr_copy(eth->h_dest, h_dest);
1106         ether_addr_copy(eth->h_source, out_dev->dev_addr);
1107         eth->h_proto = htons(ETH_P_IP);
1108
1109         ip->daddr = daddr;
1110         ip->saddr = saddr;
1111
1112         ip->ttl = ttl;
1113         ip->protocol = IPPROTO_UDP;
1114         ip->version = 0x4;
1115         ip->ihl = 0x5;
1116
1117         udp->dest = udp_dst_port;
1118         vxh->vx_flags = VXLAN_HF_VNI;
1119         vxh->vx_vni = vxlan_vni_field(vx_vni);
1120
1121         return encap_size;
1122 }
1123
1124 static int gen_vxlan_header_ipv6(struct net_device *out_dev,
1125                                  char buf[],
1126                                  unsigned char h_dest[ETH_ALEN],
1127                                  int ttl,
1128                                  struct in6_addr *daddr,
1129                                  struct in6_addr *saddr,
1130                                  __be16 udp_dst_port,
1131                                  __be32 vx_vni)
1132 {
1133         int encap_size = VXLAN_HLEN + sizeof(struct ipv6hdr) + ETH_HLEN;
1134         struct ethhdr *eth = (struct ethhdr *)buf;
1135         struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
1136         struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
1137         struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
1138
1139         memset(buf, 0, encap_size);
1140
1141         ether_addr_copy(eth->h_dest, h_dest);
1142         ether_addr_copy(eth->h_source, out_dev->dev_addr);
1143         eth->h_proto = htons(ETH_P_IPV6);
1144
1145         ip6_flow_hdr(ip6h, 0, 0);
1146         /* the HW fills up ipv6 payload len */
1147         ip6h->nexthdr     = IPPROTO_UDP;
1148         ip6h->hop_limit   = ttl;
1149         ip6h->daddr       = *daddr;
1150         ip6h->saddr       = *saddr;
1151
1152         udp->dest = udp_dst_port;
1153         vxh->vx_flags = VXLAN_HF_VNI;
1154         vxh->vx_vni = vxlan_vni_field(vx_vni);
1155
1156         return encap_size;
1157 }
1158
1159 static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
1160                                           struct net_device *mirred_dev,
1161                                           struct mlx5_encap_entry *e,
1162                                           struct net_device **out_dev)
1163 {
1164         int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
1165         struct ip_tunnel_key *tun_key = &e->tun_info.key;
1166         int encap_size, ttl, err;
1167         struct neighbour *n = NULL;
1168         struct flowi4 fl4 = {};
1169         char *encap_header;
1170
1171         encap_header = kzalloc(max_encap_size, GFP_KERNEL);
1172         if (!encap_header)
1173                 return -ENOMEM;
1174
1175         switch (e->tunnel_type) {
1176         case MLX5_HEADER_TYPE_VXLAN:
1177                 fl4.flowi4_proto = IPPROTO_UDP;
1178                 fl4.fl4_dport = tun_key->tp_dst;
1179                 break;
1180         default:
1181                 err = -EOPNOTSUPP;
1182                 goto out;
1183         }
1184         fl4.flowi4_tos = tun_key->tos;
1185         fl4.daddr = tun_key->u.ipv4.dst;
1186         fl4.saddr = tun_key->u.ipv4.src;
1187
1188         err = mlx5e_route_lookup_ipv4(priv, mirred_dev, out_dev,
1189                                       &fl4, &n, &ttl);
1190         if (err)
1191                 goto out;
1192
1193         if (!(n->nud_state & NUD_VALID)) {
1194                 pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__, &fl4.daddr);
1195                 err = -EOPNOTSUPP;
1196                 goto out;
1197         }
1198
1199         e->n = n;
1200         e->out_dev = *out_dev;
1201
1202         neigh_ha_snapshot(e->h_dest, n, *out_dev);
1203
1204         switch (e->tunnel_type) {
1205         case MLX5_HEADER_TYPE_VXLAN:
1206                 encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header,
1207                                                    e->h_dest, ttl,
1208                                                    fl4.daddr,
1209                                                    fl4.saddr, tun_key->tp_dst,
1210                                                    tunnel_id_to_key32(tun_key->tun_id));
1211                 break;
1212         default:
1213                 err = -EOPNOTSUPP;
1214                 goto out;
1215         }
1216
1217         err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
1218                                encap_size, encap_header, &e->encap_id);
1219 out:
1220         if (err && n)
1221                 neigh_release(n);
1222         kfree(encap_header);
1223         return err;
1224 }
1225
1226 static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
1227                                           struct net_device *mirred_dev,
1228                                           struct mlx5_encap_entry *e,
1229                                           struct net_device **out_dev)
1230
1231 {
1232         int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
1233         struct ip_tunnel_key *tun_key = &e->tun_info.key;
1234         int encap_size, err, ttl = 0;
1235         struct neighbour *n = NULL;
1236         struct flowi6 fl6 = {};
1237         char *encap_header;
1238
1239         encap_header = kzalloc(max_encap_size, GFP_KERNEL);
1240         if (!encap_header)
1241                 return -ENOMEM;
1242
1243         switch (e->tunnel_type) {
1244         case MLX5_HEADER_TYPE_VXLAN:
1245                 fl6.flowi6_proto = IPPROTO_UDP;
1246                 fl6.fl6_dport = tun_key->tp_dst;
1247                 break;
1248         default:
1249                 err = -EOPNOTSUPP;
1250                 goto out;
1251         }
1252
1253         fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
1254         fl6.daddr = tun_key->u.ipv6.dst;
1255         fl6.saddr = tun_key->u.ipv6.src;
1256
1257         err = mlx5e_route_lookup_ipv6(priv, mirred_dev, out_dev,
1258                                       &fl6, &n, &ttl);
1259         if (err)
1260                 goto out;
1261
1262         if (!(n->nud_state & NUD_VALID)) {
1263                 pr_warn("%s: can't offload, neighbour to %pI6 invalid\n", __func__, &fl6.daddr);
1264                 err = -EOPNOTSUPP;
1265                 goto out;
1266         }
1267
1268         e->n = n;
1269         e->out_dev = *out_dev;
1270
1271         neigh_ha_snapshot(e->h_dest, n, *out_dev);
1272
1273         switch (e->tunnel_type) {
1274         case MLX5_HEADER_TYPE_VXLAN:
1275                 encap_size = gen_vxlan_header_ipv6(*out_dev, encap_header,
1276                                                    e->h_dest, ttl,
1277                                                    &fl6.daddr,
1278                                                    &fl6.saddr, tun_key->tp_dst,
1279                                                    tunnel_id_to_key32(tun_key->tun_id));
1280                 break;
1281         default:
1282                 err = -EOPNOTSUPP;
1283                 goto out;
1284         }
1285
1286         err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
1287                                encap_size, encap_header, &e->encap_id);
1288 out:
1289         if (err && n)
1290                 neigh_release(n);
1291         kfree(encap_header);
1292         return err;
1293 }
1294
1295 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
1296                               struct ip_tunnel_info *tun_info,
1297                               struct net_device *mirred_dev,
1298                               struct mlx5_esw_flow_attr *attr)
1299 {
1300         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1301         struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
1302         struct mlx5e_priv *up_priv = netdev_priv(up_dev);
1303         unsigned short family = ip_tunnel_info_af(tun_info);
1304         struct ip_tunnel_key *key = &tun_info->key;
1305         struct mlx5_encap_entry *e;
1306         struct net_device *out_dev;
1307         int tunnel_type, err = -EOPNOTSUPP;
1308         uintptr_t hash_key;
1309         bool found = false;
1310
1311         /* udp dst port must be set */
1312         if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
1313                 goto vxlan_encap_offload_err;
1314
1315         /* setting udp src port isn't supported */
1316         if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) {
1317 vxlan_encap_offload_err:
1318                 netdev_warn(priv->netdev,
1319                             "must set udp dst port and not set udp src port\n");
1320                 return -EOPNOTSUPP;
1321         }
1322
1323         if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->tp_dst)) &&
1324             MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
1325                 tunnel_type = MLX5_HEADER_TYPE_VXLAN;
1326         } else {
1327                 netdev_warn(priv->netdev,
1328                             "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst));
1329                 return -EOPNOTSUPP;
1330         }
1331
1332         hash_key = hash_encap_info(key);
1333
1334         hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
1335                                    encap_hlist, hash_key) {
1336                 if (!cmp_encap_info(&e->tun_info.key, key)) {
1337                         found = true;
1338                         break;
1339                 }
1340         }
1341
1342         if (found) {
1343                 attr->encap = e;
1344                 return 0;
1345         }
1346
1347         e = kzalloc(sizeof(*e), GFP_KERNEL);
1348         if (!e)
1349                 return -ENOMEM;
1350
1351         e->tun_info = *tun_info;
1352         e->tunnel_type = tunnel_type;
1353         INIT_LIST_HEAD(&e->flows);
1354
1355         if (family == AF_INET)
1356                 err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev);
1357         else if (family == AF_INET6)
1358                 err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e, &out_dev);
1359
1360         if (err)
1361                 goto out_err;
1362
1363         attr->encap = e;
1364         hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
1365
1366         return err;
1367
1368 out_err:
1369         kfree(e);
1370         return err;
1371 }
1372
1373 static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
1374                                 struct mlx5e_tc_flow *flow)
1375 {
1376         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1377         struct ip_tunnel_info *info = NULL;
1378         const struct tc_action *a;
1379         LIST_HEAD(actions);
1380         bool encap = false;
1381         int err;
1382
1383         if (tc_no_actions(exts))
1384                 return -EINVAL;
1385
1386         memset(attr, 0, sizeof(*attr));
1387         attr->in_rep = priv->ppriv;
1388
1389         tcf_exts_to_list(exts, &actions);
1390         list_for_each_entry(a, &actions, list) {
1391                 if (is_tcf_gact_shot(a)) {
1392                         attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
1393                                         MLX5_FLOW_CONTEXT_ACTION_COUNT;
1394                         continue;
1395                 }
1396
1397                 if (is_tcf_mirred_egress_redirect(a)) {
1398                         int ifindex = tcf_mirred_ifindex(a);
1399                         struct net_device *out_dev;
1400                         struct mlx5e_priv *out_priv;
1401
1402                         out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
1403
1404                         if (switchdev_port_same_parent_id(priv->netdev,
1405                                                           out_dev)) {
1406                                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1407                                         MLX5_FLOW_CONTEXT_ACTION_COUNT;
1408                                 out_priv = netdev_priv(out_dev);
1409                                 attr->out_rep = out_priv->ppriv;
1410                         } else if (encap) {
1411                                 err = mlx5e_attach_encap(priv, info,
1412                                                          out_dev, attr);
1413                                 if (err)
1414                                         return err;
1415                                 list_add(&flow->encap, &attr->encap->flows);
1416                                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
1417                                         MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1418                                         MLX5_FLOW_CONTEXT_ACTION_COUNT;
1419                                 out_priv = netdev_priv(attr->encap->out_dev);
1420                                 attr->out_rep = out_priv->ppriv;
1421                         } else {
1422                                 pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
1423                                        priv->netdev->name, out_dev->name);
1424                                 return -EINVAL;
1425                         }
1426                         continue;
1427                 }
1428
1429                 if (is_tcf_tunnel_set(a)) {
1430                         info = tcf_tunnel_info(a);
1431                         if (info)
1432                                 encap = true;
1433                         else
1434                                 return -EOPNOTSUPP;
1435                         continue;
1436                 }
1437
1438                 if (is_tcf_vlan(a)) {
1439                         if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
1440                                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
1441                         } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
1442                                 if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q))
1443                                         return -EOPNOTSUPP;
1444
1445                                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
1446                                 attr->vlan = tcf_vlan_push_vid(a);
1447                         } else { /* action is TCA_VLAN_ACT_MODIFY */
1448                                 return -EOPNOTSUPP;
1449                         }
1450                         continue;
1451                 }
1452
1453                 if (is_tcf_tunnel_release(a)) {
1454                         attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
1455                         continue;
1456                 }
1457
1458                 return -EINVAL;
1459         }
1460         return 0;
1461 }
1462
1463 int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
1464                            struct tc_cls_flower_offload *f)
1465 {
1466         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1467         struct mlx5e_tc_flow_parse_attr *parse_attr;
1468         struct mlx5e_tc_table *tc = &priv->fs.tc;
1469         struct mlx5e_tc_flow *flow;
1470         int attr_size, err = 0;
1471         u8 flow_flags = 0;
1472
1473         if (esw && esw->mode == SRIOV_OFFLOADS) {
1474                 flow_flags = MLX5E_TC_FLOW_ESWITCH;
1475                 attr_size  = sizeof(struct mlx5_esw_flow_attr);
1476         } else {
1477                 flow_flags = MLX5E_TC_FLOW_NIC;
1478                 attr_size  = sizeof(struct mlx5_nic_flow_attr);
1479         }
1480
1481         flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
1482         parse_attr = mlx5_vzalloc(sizeof(*parse_attr));
1483         if (!parse_attr || !flow) {
1484                 err = -ENOMEM;
1485                 goto err_free;
1486         }
1487
1488         flow->cookie = f->cookie;
1489         flow->flags = flow_flags;
1490
1491         err = parse_cls_flower(priv, flow, &parse_attr->spec, f);
1492         if (err < 0)
1493                 goto err_free;
1494
1495         if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
1496                 err = parse_tc_fdb_actions(priv, f->exts, flow);
1497                 if (err < 0)
1498                         goto err_free;
1499                 flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow);
1500         } else {
1501                 err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow);
1502                 if (err < 0)
1503                         goto err_free;
1504                 flow->rule = mlx5e_tc_add_nic_flow(priv, parse_attr, flow);
1505         }
1506
1507         if (IS_ERR(flow->rule)) {
1508                 err = PTR_ERR(flow->rule);
1509                 goto err_free;
1510         }
1511
1512         err = rhashtable_insert_fast(&tc->ht, &flow->node,
1513                                      tc->ht_params);
1514         if (err)
1515                 goto err_del_rule;
1516
1517         goto out;
1518
1519 err_del_rule:
1520         mlx5e_tc_del_flow(priv, flow);
1521
1522 err_free:
1523         kfree(flow);
1524 out:
1525         kvfree(parse_attr);
1526         return err;
1527 }
1528
1529 int mlx5e_delete_flower(struct mlx5e_priv *priv,
1530                         struct tc_cls_flower_offload *f)
1531 {
1532         struct mlx5e_tc_flow *flow;
1533         struct mlx5e_tc_table *tc = &priv->fs.tc;
1534
1535         flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
1536                                       tc->ht_params);
1537         if (!flow)
1538                 return -EINVAL;
1539
1540         rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params);
1541
1542         mlx5e_tc_del_flow(priv, flow);
1543
1544
1545         kfree(flow);
1546
1547         return 0;
1548 }
1549
1550 int mlx5e_stats_flower(struct mlx5e_priv *priv,
1551                        struct tc_cls_flower_offload *f)
1552 {
1553         struct mlx5e_tc_table *tc = &priv->fs.tc;
1554         struct mlx5e_tc_flow *flow;
1555         struct tc_action *a;
1556         struct mlx5_fc *counter;
1557         LIST_HEAD(actions);
1558         u64 bytes;
1559         u64 packets;
1560         u64 lastuse;
1561
1562         flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
1563                                       tc->ht_params);
1564         if (!flow)
1565                 return -EINVAL;
1566
1567         counter = mlx5_flow_rule_counter(flow->rule);
1568         if (!counter)
1569                 return 0;
1570
1571         mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
1572
1573         preempt_disable();
1574
1575         tcf_exts_to_list(f->exts, &actions);
1576         list_for_each_entry(a, &actions, list)
1577                 tcf_action_stats_update(a, bytes, packets, lastuse);
1578
1579         preempt_enable();
1580
1581         return 0;
1582 }
1583
1584 static const struct rhashtable_params mlx5e_tc_flow_ht_params = {
1585         .head_offset = offsetof(struct mlx5e_tc_flow, node),
1586         .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
1587         .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
1588         .automatic_shrinking = true,
1589 };
1590
1591 int mlx5e_tc_init(struct mlx5e_priv *priv)
1592 {
1593         struct mlx5e_tc_table *tc = &priv->fs.tc;
1594
1595         tc->ht_params = mlx5e_tc_flow_ht_params;
1596         return rhashtable_init(&tc->ht, &tc->ht_params);
1597 }
1598
1599 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
1600 {
1601         struct mlx5e_tc_flow *flow = ptr;
1602         struct mlx5e_priv *priv = arg;
1603
1604         mlx5e_tc_del_flow(priv, flow);
1605         kfree(flow);
1606 }
1607
1608 void mlx5e_tc_cleanup(struct mlx5e_priv *priv)
1609 {
1610         struct mlx5e_tc_table *tc = &priv->fs.tc;
1611
1612         rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, priv);
1613
1614         if (!IS_ERR_OR_NULL(tc->t)) {
1615                 mlx5_destroy_flow_table(tc->t);
1616                 tc->t = NULL;
1617         }
1618 }