2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <net/flow_dissector.h>
34 #include <net/sch_generic.h>
35 #include <net/pkt_cls.h>
36 #include <net/tc_act/tc_gact.h>
37 #include <net/tc_act/tc_skbedit.h>
38 #include <linux/mlx5/fs.h>
39 #include <linux/mlx5/device.h>
40 #include <linux/rhashtable.h>
41 #include <net/switchdev.h>
42 #include <net/tc_act/tc_mirred.h>
43 #include <net/tc_act/tc_vlan.h>
44 #include <net/tc_act/tc_tunnel_key.h>
45 #include <net/tc_act/tc_pedit.h>
46 #include <net/tc_act/tc_csum.h>
47 #include <net/vxlan.h>
55 struct mlx5_nic_flow_attr {
62 MLX5E_TC_FLOW_ESWITCH = BIT(0),
63 MLX5E_TC_FLOW_NIC = BIT(1),
64 MLX5E_TC_FLOW_OFFLOADED = BIT(2),
67 struct mlx5e_tc_flow {
68 struct rhash_head node;
71 struct mlx5_flow_handle *rule;
72 struct list_head encap; /* flows sharing the same encap */
74 struct mlx5_esw_flow_attr esw_attr[0];
75 struct mlx5_nic_flow_attr nic_attr[0];
79 struct mlx5e_tc_flow_parse_attr {
80 struct mlx5_flow_spec spec;
81 int num_mod_hdr_actions;
82 void *mod_hdr_actions;
86 MLX5_HEADER_TYPE_VXLAN = 0x0,
87 MLX5_HEADER_TYPE_NVGRE = 0x1,
90 #define MLX5E_TC_TABLE_NUM_ENTRIES 1024
91 #define MLX5E_TC_TABLE_NUM_GROUPS 4
93 static struct mlx5_flow_handle *
94 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
95 struct mlx5e_tc_flow_parse_attr *parse_attr,
96 struct mlx5e_tc_flow *flow)
98 struct mlx5_nic_flow_attr *attr = flow->nic_attr;
99 struct mlx5_core_dev *dev = priv->mdev;
100 struct mlx5_flow_destination dest = {};
101 struct mlx5_flow_act flow_act = {
102 .action = attr->action,
103 .flow_tag = attr->flow_tag,
106 struct mlx5_fc *counter = NULL;
107 struct mlx5_flow_handle *rule;
108 bool table_created = false;
111 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
112 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
113 dest.ft = priv->fs.vlan.ft.t;
114 } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
115 counter = mlx5_fc_create(dev, true);
117 return ERR_CAST(counter);
119 dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
120 dest.counter = counter;
123 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
124 err = mlx5_modify_header_alloc(dev, MLX5_FLOW_NAMESPACE_KERNEL,
125 parse_attr->num_mod_hdr_actions,
126 parse_attr->mod_hdr_actions,
128 flow_act.modify_id = attr->mod_hdr_id;
129 kfree(parse_attr->mod_hdr_actions);
132 goto err_create_mod_hdr_id;
136 if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
138 mlx5_create_auto_grouped_flow_table(priv->fs.ns,
140 MLX5E_TC_TABLE_NUM_ENTRIES,
141 MLX5E_TC_TABLE_NUM_GROUPS,
143 if (IS_ERR(priv->fs.tc.t)) {
144 netdev_err(priv->netdev,
145 "Failed to create tc offload table\n");
146 rule = ERR_CAST(priv->fs.tc.t);
150 table_created = true;
153 parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
154 rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
155 &flow_act, &dest, 1);
164 mlx5_destroy_flow_table(priv->fs.tc.t);
165 priv->fs.tc.t = NULL;
168 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
169 mlx5_modify_header_dealloc(priv->mdev,
171 err_create_mod_hdr_id:
172 mlx5_fc_destroy(dev, counter);
177 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
178 struct mlx5e_tc_flow *flow)
180 struct mlx5_nic_flow_attr *attr = flow->nic_attr;
181 struct mlx5_fc *counter = NULL;
183 counter = mlx5_flow_rule_counter(flow->rule);
184 mlx5_del_flow_rules(flow->rule);
185 mlx5_fc_destroy(priv->mdev, counter);
187 if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
188 mlx5_destroy_flow_table(priv->fs.tc.t);
189 priv->fs.tc.t = NULL;
192 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
193 mlx5_modify_header_dealloc(priv->mdev,
197 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
198 struct mlx5e_tc_flow *flow);
200 static struct mlx5_flow_handle *
201 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
202 struct mlx5e_tc_flow_parse_attr *parse_attr,
203 struct mlx5e_tc_flow *flow)
205 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
206 struct mlx5_esw_flow_attr *attr = flow->esw_attr;
207 struct mlx5_flow_handle *rule;
210 err = mlx5_eswitch_add_vlan_action(esw, attr);
216 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
217 err = mlx5_modify_header_alloc(priv->mdev, MLX5_FLOW_NAMESPACE_FDB,
218 parse_attr->num_mod_hdr_actions,
219 parse_attr->mod_hdr_actions,
221 kfree(parse_attr->mod_hdr_actions);
228 rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
235 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
236 mlx5_modify_header_dealloc(priv->mdev,
239 mlx5_eswitch_del_vlan_action(esw, attr);
241 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
242 mlx5e_detach_encap(priv, flow);
246 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
247 struct mlx5e_tc_flow *flow)
249 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
250 struct mlx5_esw_flow_attr *attr = flow->esw_attr;
252 if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
253 flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
254 mlx5_eswitch_del_offloaded_rule(esw, flow->rule, attr);
257 mlx5_eswitch_del_vlan_action(esw, attr);
259 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
260 mlx5e_detach_encap(priv, flow);
261 kvfree(attr->parse_attr);
264 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
265 mlx5_modify_header_dealloc(priv->mdev,
269 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
270 struct mlx5e_encap_entry *e)
272 struct mlx5e_tc_flow *flow;
275 err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
276 e->encap_size, e->encap_header,
279 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n",
283 e->flags |= MLX5_ENCAP_ENTRY_VALID;
284 mlx5e_rep_queue_neigh_stats_work(priv);
286 list_for_each_entry(flow, &e->flows, encap) {
287 flow->esw_attr->encap_id = e->encap_id;
288 flow->rule = mlx5e_tc_add_fdb_flow(priv,
289 flow->esw_attr->parse_attr,
291 if (IS_ERR(flow->rule)) {
292 err = PTR_ERR(flow->rule);
293 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
297 flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
301 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
302 struct mlx5e_encap_entry *e)
304 struct mlx5e_tc_flow *flow;
305 struct mlx5_fc *counter;
307 list_for_each_entry(flow, &e->flows, encap) {
308 if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
309 flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
310 counter = mlx5_flow_rule_counter(flow->rule);
311 mlx5_del_flow_rules(flow->rule);
312 mlx5_fc_destroy(priv->mdev, counter);
316 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
317 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
318 mlx5_encap_dealloc(priv->mdev, e->encap_id);
322 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
324 struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
325 u64 bytes, packets, lastuse = 0;
326 struct mlx5e_tc_flow *flow;
327 struct mlx5e_encap_entry *e;
328 struct mlx5_fc *counter;
329 struct neigh_table *tbl;
330 bool neigh_used = false;
333 if (m_neigh->family == AF_INET)
335 #if IS_ENABLED(CONFIG_IPV6)
336 else if (m_neigh->family == AF_INET6)
337 tbl = ipv6_stub->nd_tbl;
342 list_for_each_entry(e, &nhe->encap_list, encap_list) {
343 if (!(e->flags & MLX5_ENCAP_ENTRY_VALID))
345 list_for_each_entry(flow, &e->flows, encap) {
346 if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
347 counter = mlx5_flow_rule_counter(flow->rule);
348 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
349 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
358 nhe->reported_lastuse = jiffies;
360 /* find the relevant neigh according to the cached device and
363 n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev);
365 WARN(1, "The neighbour already freed\n");
369 neigh_event_send(n, NULL);
374 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
375 struct mlx5e_tc_flow *flow)
377 struct list_head *next = flow->encap.next;
379 list_del(&flow->encap);
380 if (list_empty(next)) {
381 struct mlx5e_encap_entry *e;
383 e = list_entry(next, struct mlx5e_encap_entry, flows);
384 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
386 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
387 mlx5_encap_dealloc(priv->mdev, e->encap_id);
389 hash_del_rcu(&e->encap_hlist);
390 kfree(e->encap_header);
395 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
396 struct mlx5e_tc_flow *flow)
398 if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
399 mlx5e_tc_del_fdb_flow(priv, flow);
401 mlx5e_tc_del_nic_flow(priv, flow);
404 static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
405 struct tc_cls_flower_offload *f)
407 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
409 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
411 void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
413 void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
416 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
417 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
419 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
420 struct flow_dissector_key_keyid *key =
421 skb_flow_dissector_target(f->dissector,
422 FLOW_DISSECTOR_KEY_ENC_KEYID,
424 struct flow_dissector_key_keyid *mask =
425 skb_flow_dissector_target(f->dissector,
426 FLOW_DISSECTOR_KEY_ENC_KEYID,
428 MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
429 be32_to_cpu(mask->keyid));
430 MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
431 be32_to_cpu(key->keyid));
435 static int parse_tunnel_attr(struct mlx5e_priv *priv,
436 struct mlx5_flow_spec *spec,
437 struct tc_cls_flower_offload *f)
439 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
441 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
444 struct flow_dissector_key_control *enc_control =
445 skb_flow_dissector_target(f->dissector,
446 FLOW_DISSECTOR_KEY_ENC_CONTROL,
449 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
450 struct flow_dissector_key_ports *key =
451 skb_flow_dissector_target(f->dissector,
452 FLOW_DISSECTOR_KEY_ENC_PORTS,
454 struct flow_dissector_key_ports *mask =
455 skb_flow_dissector_target(f->dissector,
456 FLOW_DISSECTOR_KEY_ENC_PORTS,
458 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
459 struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
460 struct mlx5e_priv *up_priv = netdev_priv(up_dev);
462 /* Full udp dst port must be given */
463 if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
464 goto vxlan_match_offload_err;
466 if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->dst)) &&
467 MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
468 parse_vxlan_attr(spec, f);
470 netdev_warn(priv->netdev,
471 "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst));
475 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
476 udp_dport, ntohs(mask->dst));
477 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
478 udp_dport, ntohs(key->dst));
480 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
481 udp_sport, ntohs(mask->src));
482 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
483 udp_sport, ntohs(key->src));
484 } else { /* udp dst port must be given */
485 vxlan_match_offload_err:
486 netdev_warn(priv->netdev,
487 "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
491 if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
492 struct flow_dissector_key_ipv4_addrs *key =
493 skb_flow_dissector_target(f->dissector,
494 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
496 struct flow_dissector_key_ipv4_addrs *mask =
497 skb_flow_dissector_target(f->dissector,
498 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
500 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
501 src_ipv4_src_ipv6.ipv4_layout.ipv4,
503 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
504 src_ipv4_src_ipv6.ipv4_layout.ipv4,
507 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
508 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
510 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
511 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
514 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
515 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
516 } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
517 struct flow_dissector_key_ipv6_addrs *key =
518 skb_flow_dissector_target(f->dissector,
519 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
521 struct flow_dissector_key_ipv6_addrs *mask =
522 skb_flow_dissector_target(f->dissector,
523 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
526 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
527 src_ipv4_src_ipv6.ipv6_layout.ipv6),
528 &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
529 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
530 src_ipv4_src_ipv6.ipv6_layout.ipv6),
531 &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
533 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
534 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
535 &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
536 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
537 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
538 &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
540 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
541 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6);
544 /* Enforce DMAC when offloading incoming tunneled flows.
545 * Flow counters require a match on the DMAC.
547 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
548 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
549 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
550 dmac_47_16), priv->netdev->dev_addr);
552 /* let software handle IP fragments */
553 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
554 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
559 static int __parse_cls_flower(struct mlx5e_priv *priv,
560 struct mlx5_flow_spec *spec,
561 struct tc_cls_flower_offload *f,
564 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
566 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
571 *min_inline = MLX5_INLINE_MODE_L2;
573 if (f->dissector->used_keys &
574 ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
575 BIT(FLOW_DISSECTOR_KEY_BASIC) |
576 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
577 BIT(FLOW_DISSECTOR_KEY_VLAN) |
578 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
579 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
580 BIT(FLOW_DISSECTOR_KEY_PORTS) |
581 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
582 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
583 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
584 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
585 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
586 BIT(FLOW_DISSECTOR_KEY_TCP) |
587 BIT(FLOW_DISSECTOR_KEY_IP))) {
588 netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
589 f->dissector->used_keys);
593 if ((dissector_uses_key(f->dissector,
594 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
595 dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
596 dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) &&
597 dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
598 struct flow_dissector_key_control *key =
599 skb_flow_dissector_target(f->dissector,
600 FLOW_DISSECTOR_KEY_ENC_CONTROL,
602 switch (key->addr_type) {
603 case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
604 case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
605 if (parse_tunnel_attr(priv, spec, f))
612 /* In decap flow, header pointers should point to the inner
613 * headers, outer header were already set by parse_tunnel_attr
615 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
617 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
621 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
622 struct flow_dissector_key_control *key =
623 skb_flow_dissector_target(f->dissector,
624 FLOW_DISSECTOR_KEY_CONTROL,
627 struct flow_dissector_key_control *mask =
628 skb_flow_dissector_target(f->dissector,
629 FLOW_DISSECTOR_KEY_CONTROL,
631 addr_type = key->addr_type;
633 if (mask->flags & FLOW_DIS_IS_FRAGMENT) {
634 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
635 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
636 key->flags & FLOW_DIS_IS_FRAGMENT);
638 /* the HW doesn't need L3 inline to match on frag=no */
639 if (key->flags & FLOW_DIS_IS_FRAGMENT)
640 *min_inline = MLX5_INLINE_MODE_IP;
644 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
645 struct flow_dissector_key_basic *key =
646 skb_flow_dissector_target(f->dissector,
647 FLOW_DISSECTOR_KEY_BASIC,
649 struct flow_dissector_key_basic *mask =
650 skb_flow_dissector_target(f->dissector,
651 FLOW_DISSECTOR_KEY_BASIC,
653 ip_proto = key->ip_proto;
655 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
656 ntohs(mask->n_proto));
657 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
658 ntohs(key->n_proto));
660 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
662 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
666 *min_inline = MLX5_INLINE_MODE_IP;
669 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
670 struct flow_dissector_key_eth_addrs *key =
671 skb_flow_dissector_target(f->dissector,
672 FLOW_DISSECTOR_KEY_ETH_ADDRS,
674 struct flow_dissector_key_eth_addrs *mask =
675 skb_flow_dissector_target(f->dissector,
676 FLOW_DISSECTOR_KEY_ETH_ADDRS,
679 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
682 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
686 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
689 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
694 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
695 struct flow_dissector_key_vlan *key =
696 skb_flow_dissector_target(f->dissector,
697 FLOW_DISSECTOR_KEY_VLAN,
699 struct flow_dissector_key_vlan *mask =
700 skb_flow_dissector_target(f->dissector,
701 FLOW_DISSECTOR_KEY_VLAN,
703 if (mask->vlan_id || mask->vlan_priority) {
704 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
705 MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
707 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
708 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
710 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority);
711 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority);
715 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
716 struct flow_dissector_key_ipv4_addrs *key =
717 skb_flow_dissector_target(f->dissector,
718 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
720 struct flow_dissector_key_ipv4_addrs *mask =
721 skb_flow_dissector_target(f->dissector,
722 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
725 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
726 src_ipv4_src_ipv6.ipv4_layout.ipv4),
727 &mask->src, sizeof(mask->src));
728 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
729 src_ipv4_src_ipv6.ipv4_layout.ipv4),
730 &key->src, sizeof(key->src));
731 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
732 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
733 &mask->dst, sizeof(mask->dst));
734 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
735 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
736 &key->dst, sizeof(key->dst));
738 if (mask->src || mask->dst)
739 *min_inline = MLX5_INLINE_MODE_IP;
742 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
743 struct flow_dissector_key_ipv6_addrs *key =
744 skb_flow_dissector_target(f->dissector,
745 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
747 struct flow_dissector_key_ipv6_addrs *mask =
748 skb_flow_dissector_target(f->dissector,
749 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
752 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
753 src_ipv4_src_ipv6.ipv6_layout.ipv6),
754 &mask->src, sizeof(mask->src));
755 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
756 src_ipv4_src_ipv6.ipv6_layout.ipv6),
757 &key->src, sizeof(key->src));
759 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
760 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
761 &mask->dst, sizeof(mask->dst));
762 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
763 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
764 &key->dst, sizeof(key->dst));
766 if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY ||
767 ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY)
768 *min_inline = MLX5_INLINE_MODE_IP;
771 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
772 struct flow_dissector_key_ports *key =
773 skb_flow_dissector_target(f->dissector,
774 FLOW_DISSECTOR_KEY_PORTS,
776 struct flow_dissector_key_ports *mask =
777 skb_flow_dissector_target(f->dissector,
778 FLOW_DISSECTOR_KEY_PORTS,
782 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
783 tcp_sport, ntohs(mask->src));
784 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
785 tcp_sport, ntohs(key->src));
787 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
788 tcp_dport, ntohs(mask->dst));
789 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
790 tcp_dport, ntohs(key->dst));
794 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
795 udp_sport, ntohs(mask->src));
796 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
797 udp_sport, ntohs(key->src));
799 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
800 udp_dport, ntohs(mask->dst));
801 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
802 udp_dport, ntohs(key->dst));
805 netdev_err(priv->netdev,
806 "Only UDP and TCP transport are supported\n");
810 if (mask->src || mask->dst)
811 *min_inline = MLX5_INLINE_MODE_TCP_UDP;
814 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) {
815 struct flow_dissector_key_ip *key =
816 skb_flow_dissector_target(f->dissector,
817 FLOW_DISSECTOR_KEY_IP,
819 struct flow_dissector_key_ip *mask =
820 skb_flow_dissector_target(f->dissector,
821 FLOW_DISSECTOR_KEY_IP,
824 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3);
825 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3);
827 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2);
828 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2);
831 *min_inline = MLX5_INLINE_MODE_IP;
833 if (mask->ttl) /* currently not supported */
837 if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) {
838 struct flow_dissector_key_tcp *key =
839 skb_flow_dissector_target(f->dissector,
840 FLOW_DISSECTOR_KEY_TCP,
842 struct flow_dissector_key_tcp *mask =
843 skb_flow_dissector_target(f->dissector,
844 FLOW_DISSECTOR_KEY_TCP,
847 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
849 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
853 *min_inline = MLX5_INLINE_MODE_TCP_UDP;
859 static int parse_cls_flower(struct mlx5e_priv *priv,
860 struct mlx5e_tc_flow *flow,
861 struct mlx5_flow_spec *spec,
862 struct tc_cls_flower_offload *f)
864 struct mlx5_core_dev *dev = priv->mdev;
865 struct mlx5_eswitch *esw = dev->priv.eswitch;
866 struct mlx5e_rep_priv *rpriv = priv->ppriv;
867 struct mlx5_eswitch_rep *rep;
871 err = __parse_cls_flower(priv, spec, f, &min_inline);
873 if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) {
875 if (rep->vport != FDB_UPLINK_VPORT &&
876 (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
877 esw->offloads.inline_mode < min_inline)) {
878 netdev_warn(priv->netdev,
879 "Flow is not offloaded due to min inline setting, required %d actual %d\n",
880 min_inline, esw->offloads.inline_mode);
888 struct pedit_headers {
896 static int pedit_header_offsets[] = {
897 [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
898 [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
899 [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
900 [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
901 [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
904 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
906 static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
907 struct pedit_headers *masks,
908 struct pedit_headers *vals)
910 u32 *curr_pmask, *curr_pval;
912 if (hdr_type >= __PEDIT_HDR_TYPE_MAX)
915 curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset);
916 curr_pval = (u32 *)(pedit_header(vals, hdr_type) + offset);
918 if (*curr_pmask & mask) /* disallow acting twice on the same location */
922 *curr_pval |= (val & mask);
936 static struct mlx5_fields fields[] = {
937 {MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16, 4, offsetof(struct pedit_headers, eth.h_dest[0])},
938 {MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0, 2, offsetof(struct pedit_headers, eth.h_dest[4])},
939 {MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16, 4, offsetof(struct pedit_headers, eth.h_source[0])},
940 {MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0, 2, offsetof(struct pedit_headers, eth.h_source[4])},
941 {MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE, 2, offsetof(struct pedit_headers, eth.h_proto)},
943 {MLX5_ACTION_IN_FIELD_OUT_IP_DSCP, 1, offsetof(struct pedit_headers, ip4.tos)},
944 {MLX5_ACTION_IN_FIELD_OUT_IP_TTL, 1, offsetof(struct pedit_headers, ip4.ttl)},
945 {MLX5_ACTION_IN_FIELD_OUT_SIPV4, 4, offsetof(struct pedit_headers, ip4.saddr)},
946 {MLX5_ACTION_IN_FIELD_OUT_DIPV4, 4, offsetof(struct pedit_headers, ip4.daddr)},
948 {MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[0])},
949 {MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[1])},
950 {MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[2])},
951 {MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[3])},
952 {MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[0])},
953 {MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[1])},
954 {MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[2])},
955 {MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[3])},
957 {MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT, 2, offsetof(struct pedit_headers, tcp.source)},
958 {MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT, 2, offsetof(struct pedit_headers, tcp.dest)},
959 {MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS, 1, offsetof(struct pedit_headers, tcp.ack_seq) + 5},
961 {MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT, 2, offsetof(struct pedit_headers, udp.source)},
962 {MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT, 2, offsetof(struct pedit_headers, udp.dest)},
965 /* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at
966 * max from the SW pedit action. On success, it says how many HW actions were
969 static int offload_pedit_fields(struct pedit_headers *masks,
970 struct pedit_headers *vals,
971 struct mlx5e_tc_flow_parse_attr *parse_attr)
973 struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
974 int i, action_size, nactions, max_actions, first, last, first_z;
975 void *s_masks_p, *a_masks_p, *vals_p;
976 struct mlx5_fields *f;
982 set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET];
983 add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD];
984 set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET];
985 add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD];
987 action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
988 action = parse_attr->mod_hdr_actions;
989 max_actions = parse_attr->num_mod_hdr_actions;
992 for (i = 0; i < ARRAY_SIZE(fields); i++) {
994 /* avoid seeing bits set from previous iterations */
998 s_masks_p = (void *)set_masks + f->offset;
999 a_masks_p = (void *)add_masks + f->offset;
1001 memcpy(&s_mask, s_masks_p, f->size);
1002 memcpy(&a_mask, a_masks_p, f->size);
1004 if (!s_mask && !a_mask) /* nothing to offload here */
1007 if (s_mask && a_mask) {
1008 printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
1012 if (nactions == max_actions) {
1013 printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions);
1018 cmd = MLX5_ACTION_TYPE_SET;
1020 vals_p = (void *)set_vals + f->offset;
1021 /* clear to denote we consumed this field */
1022 memset(s_masks_p, 0, f->size);
1024 cmd = MLX5_ACTION_TYPE_ADD;
1026 vals_p = (void *)add_vals + f->offset;
1027 /* clear to denote we consumed this field */
1028 memset(a_masks_p, 0, f->size);
1031 field_bsize = f->size * BITS_PER_BYTE;
1033 first_z = find_first_zero_bit(&mask, field_bsize);
1034 first = find_first_bit(&mask, field_bsize);
1035 last = find_last_bit(&mask, field_bsize);
1036 if (first > 0 || last != (field_bsize - 1) || first_z < last) {
1037 printk(KERN_WARNING "mlx5: partial rewrite (mask %lx) is currently not offloaded\n",
1042 MLX5_SET(set_action_in, action, action_type, cmd);
1043 MLX5_SET(set_action_in, action, field, f->field);
1045 if (cmd == MLX5_ACTION_TYPE_SET) {
1046 MLX5_SET(set_action_in, action, offset, 0);
1047 /* length is num of bits to be written, zero means length of 32 */
1048 MLX5_SET(set_action_in, action, length, field_bsize);
1051 if (field_bsize == 32)
1052 MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p));
1053 else if (field_bsize == 16)
1054 MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p));
1055 else if (field_bsize == 8)
1056 MLX5_SET(set_action_in, action, data, *(u8 *)vals_p);
1058 action += action_size;
1062 parse_attr->num_mod_hdr_actions = nactions;
1066 static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
1067 const struct tc_action *a, int namespace,
1068 struct mlx5e_tc_flow_parse_attr *parse_attr)
1070 int nkeys, action_size, max_actions;
1072 nkeys = tcf_pedit_nkeys(a);
1073 action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
1075 if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
1076 max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, max_modify_header_actions);
1077 else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
1078 max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, max_modify_header_actions);
1080 /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
1081 max_actions = min(max_actions, nkeys * 16);
1083 parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL);
1084 if (!parse_attr->mod_hdr_actions)
1087 parse_attr->num_mod_hdr_actions = max_actions;
1091 static const struct pedit_headers zero_masks = {};
1093 static int parse_tc_pedit_action(struct mlx5e_priv *priv,
1094 const struct tc_action *a, int namespace,
1095 struct mlx5e_tc_flow_parse_attr *parse_attr)
1097 struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks;
1098 int nkeys, i, err = -EOPNOTSUPP;
1099 u32 mask, val, offset;
1102 nkeys = tcf_pedit_nkeys(a);
1104 memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
1105 memset(vals, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
1107 for (i = 0; i < nkeys; i++) {
1108 htype = tcf_pedit_htype(a, i);
1109 cmd = tcf_pedit_cmd(a, i);
1110 err = -EOPNOTSUPP; /* can't be all optimistic */
1112 if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) {
1113 printk(KERN_WARNING "mlx5: legacy pedit isn't offloaded\n");
1117 if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) {
1118 printk(KERN_WARNING "mlx5: pedit cmd %d isn't offloaded\n", cmd);
1122 mask = tcf_pedit_mask(a, i);
1123 val = tcf_pedit_val(a, i);
1124 offset = tcf_pedit_offset(a, i);
1126 err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]);
1131 err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr);
1135 err = offload_pedit_fields(masks, vals, parse_attr);
1137 goto out_dealloc_parsed_actions;
1139 for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
1140 cmd_masks = &masks[cmd];
1141 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
1142 printk(KERN_WARNING "mlx5: attempt to offload an unsupported field (cmd %d)\n",
1144 print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
1145 16, 1, cmd_masks, sizeof(zero_masks), true);
1147 goto out_dealloc_parsed_actions;
1153 out_dealloc_parsed_actions:
1154 kfree(parse_attr->mod_hdr_actions);
1159 static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 update_flags)
1161 u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
1162 TCA_CSUM_UPDATE_FLAG_UDP;
1164 /* The HW recalcs checksums only if re-writing headers */
1165 if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
1166 netdev_warn(priv->netdev,
1167 "TC csum action is only offloaded with pedit\n");
1171 if (update_flags & ~prot_flags) {
1172 netdev_warn(priv->netdev,
1173 "can't offload TC csum action for some header/s - flags %#x\n",
1181 static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
1182 struct mlx5e_tc_flow_parse_attr *parse_attr,
1183 struct mlx5e_tc_flow *flow)
1185 struct mlx5_nic_flow_attr *attr = flow->nic_attr;
1186 const struct tc_action *a;
1190 if (tc_no_actions(exts))
1193 attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
1196 tcf_exts_to_list(exts, &actions);
1197 list_for_each_entry(a, &actions, list) {
1198 if (is_tcf_gact_shot(a)) {
1199 attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
1200 if (MLX5_CAP_FLOWTABLE(priv->mdev,
1201 flow_table_properties_nic_receive.flow_counter))
1202 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1206 if (is_tcf_pedit(a)) {
1207 err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL,
1212 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
1213 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1217 if (is_tcf_csum(a)) {
1218 if (csum_offload_supported(priv, attr->action,
1219 tcf_csum_update_flags(a)))
1225 if (is_tcf_skbedit_mark(a)) {
1226 u32 mark = tcf_skbedit_mark(a);
1228 if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
1229 netdev_warn(priv->netdev, "Bad flow mark - only 16 bit is supported: 0x%x\n",
1234 attr->flow_tag = mark;
1235 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1245 static inline int cmp_encap_info(struct ip_tunnel_key *a,
1246 struct ip_tunnel_key *b)
1248 return memcmp(a, b, sizeof(*a));
1251 static inline int hash_encap_info(struct ip_tunnel_key *key)
1253 return jhash(key, sizeof(*key), 0);
1256 static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
1257 struct net_device *mirred_dev,
1258 struct net_device **out_dev,
1260 struct neighbour **out_n,
1263 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1265 struct neighbour *n = NULL;
1267 #if IS_ENABLED(CONFIG_INET)
1270 rt = ip_route_output_key(dev_net(mirred_dev), fl4);
1271 ret = PTR_ERR_OR_ZERO(rt);
1277 /* if the egress device isn't on the same HW e-switch, we use the uplink */
1278 if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
1279 *out_dev = mlx5_eswitch_get_uplink_netdev(esw);
1281 *out_dev = rt->dst.dev;
1283 *out_ttl = ip4_dst_hoplimit(&rt->dst);
1284 n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
1293 static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
1294 struct net_device *mirred_dev,
1295 struct net_device **out_dev,
1297 struct neighbour **out_n,
1300 struct neighbour *n = NULL;
1301 struct dst_entry *dst;
1303 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
1304 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1307 dst = ip6_route_output(dev_net(mirred_dev), NULL, fl6);
1314 *out_ttl = ip6_dst_hoplimit(dst);
1316 /* if the egress device isn't on the same HW e-switch, we use the uplink */
1317 if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
1318 *out_dev = mlx5_eswitch_get_uplink_netdev(esw);
1320 *out_dev = dst->dev;
1325 n = dst_neigh_lookup(dst, &fl6->daddr);
1334 static void gen_vxlan_header_ipv4(struct net_device *out_dev,
1335 char buf[], int encap_size,
1336 unsigned char h_dest[ETH_ALEN],
1340 __be16 udp_dst_port,
1343 struct ethhdr *eth = (struct ethhdr *)buf;
1344 struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
1345 struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
1346 struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
1348 memset(buf, 0, encap_size);
1350 ether_addr_copy(eth->h_dest, h_dest);
1351 ether_addr_copy(eth->h_source, out_dev->dev_addr);
1352 eth->h_proto = htons(ETH_P_IP);
1358 ip->protocol = IPPROTO_UDP;
1362 udp->dest = udp_dst_port;
1363 vxh->vx_flags = VXLAN_HF_VNI;
1364 vxh->vx_vni = vxlan_vni_field(vx_vni);
1367 static void gen_vxlan_header_ipv6(struct net_device *out_dev,
1368 char buf[], int encap_size,
1369 unsigned char h_dest[ETH_ALEN],
1371 struct in6_addr *daddr,
1372 struct in6_addr *saddr,
1373 __be16 udp_dst_port,
1376 struct ethhdr *eth = (struct ethhdr *)buf;
1377 struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
1378 struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
1379 struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
1381 memset(buf, 0, encap_size);
1383 ether_addr_copy(eth->h_dest, h_dest);
1384 ether_addr_copy(eth->h_source, out_dev->dev_addr);
1385 eth->h_proto = htons(ETH_P_IPV6);
1387 ip6_flow_hdr(ip6h, 0, 0);
1388 /* the HW fills up ipv6 payload len */
1389 ip6h->nexthdr = IPPROTO_UDP;
1390 ip6h->hop_limit = ttl;
1391 ip6h->daddr = *daddr;
1392 ip6h->saddr = *saddr;
1394 udp->dest = udp_dst_port;
1395 vxh->vx_flags = VXLAN_HF_VNI;
1396 vxh->vx_vni = vxlan_vni_field(vx_vni);
1399 static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
1400 struct net_device *mirred_dev,
1401 struct mlx5e_encap_entry *e)
1403 int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
1404 int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN;
1405 struct ip_tunnel_key *tun_key = &e->tun_info.key;
1406 struct net_device *out_dev;
1407 struct neighbour *n = NULL;
1408 struct flowi4 fl4 = {};
1413 if (max_encap_size < ipv4_encap_size) {
1414 mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
1415 ipv4_encap_size, max_encap_size);
1419 encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
1423 switch (e->tunnel_type) {
1424 case MLX5_HEADER_TYPE_VXLAN:
1425 fl4.flowi4_proto = IPPROTO_UDP;
1426 fl4.fl4_dport = tun_key->tp_dst;
1432 fl4.flowi4_tos = tun_key->tos;
1433 fl4.daddr = tun_key->u.ipv4.dst;
1434 fl4.saddr = tun_key->u.ipv4.src;
1436 err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev,
1441 /* used by mlx5e_detach_encap to lookup a neigh hash table
1442 * entry in the neigh hash table when a user deletes a rule
1444 e->m_neigh.dev = n->dev;
1445 e->m_neigh.family = n->ops->family;
1446 memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
1447 e->out_dev = out_dev;
1449 /* It's importent to add the neigh to the hash table before checking
1450 * the neigh validity state. So if we'll get a notification, in case the
1451 * neigh changes it's validity state, we would find the relevant neigh
1454 err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
1458 read_lock_bh(&n->lock);
1459 nud_state = n->nud_state;
1460 ether_addr_copy(e->h_dest, n->ha);
1461 read_unlock_bh(&n->lock);
1463 switch (e->tunnel_type) {
1464 case MLX5_HEADER_TYPE_VXLAN:
1465 gen_vxlan_header_ipv4(out_dev, encap_header,
1466 ipv4_encap_size, e->h_dest, ttl,
1468 fl4.saddr, tun_key->tp_dst,
1469 tunnel_id_to_key32(tun_key->tun_id));
1473 goto destroy_neigh_entry;
1475 e->encap_size = ipv4_encap_size;
1476 e->encap_header = encap_header;
1478 if (!(nud_state & NUD_VALID)) {
1479 neigh_event_send(n, NULL);
1484 err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
1485 ipv4_encap_size, encap_header, &e->encap_id);
1487 goto destroy_neigh_entry;
1489 e->flags |= MLX5_ENCAP_ENTRY_VALID;
1490 mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
1494 destroy_neigh_entry:
1495 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
1497 kfree(encap_header);
1503 static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
1504 struct net_device *mirred_dev,
1505 struct mlx5e_encap_entry *e)
1507 int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
1508 int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN;
1509 struct ip_tunnel_key *tun_key = &e->tun_info.key;
1510 struct net_device *out_dev;
1511 struct neighbour *n = NULL;
1512 struct flowi6 fl6 = {};
1517 if (max_encap_size < ipv6_encap_size) {
1518 mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
1519 ipv6_encap_size, max_encap_size);
1523 encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
1527 switch (e->tunnel_type) {
1528 case MLX5_HEADER_TYPE_VXLAN:
1529 fl6.flowi6_proto = IPPROTO_UDP;
1530 fl6.fl6_dport = tun_key->tp_dst;
1537 fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
1538 fl6.daddr = tun_key->u.ipv6.dst;
1539 fl6.saddr = tun_key->u.ipv6.src;
1541 err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev,
1546 /* used by mlx5e_detach_encap to lookup a neigh hash table
1547 * entry in the neigh hash table when a user deletes a rule
1549 e->m_neigh.dev = n->dev;
1550 e->m_neigh.family = n->ops->family;
1551 memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
1552 e->out_dev = out_dev;
1554 /* It's importent to add the neigh to the hash table before checking
1555 * the neigh validity state. So if we'll get a notification, in case the
1556 * neigh changes it's validity state, we would find the relevant neigh
1559 err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
1563 read_lock_bh(&n->lock);
1564 nud_state = n->nud_state;
1565 ether_addr_copy(e->h_dest, n->ha);
1566 read_unlock_bh(&n->lock);
1568 switch (e->tunnel_type) {
1569 case MLX5_HEADER_TYPE_VXLAN:
1570 gen_vxlan_header_ipv6(out_dev, encap_header,
1571 ipv6_encap_size, e->h_dest, ttl,
1573 &fl6.saddr, tun_key->tp_dst,
1574 tunnel_id_to_key32(tun_key->tun_id));
1578 goto destroy_neigh_entry;
1581 e->encap_size = ipv6_encap_size;
1582 e->encap_header = encap_header;
1584 if (!(nud_state & NUD_VALID)) {
1585 neigh_event_send(n, NULL);
1590 err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
1591 ipv6_encap_size, encap_header, &e->encap_id);
1593 goto destroy_neigh_entry;
1595 e->flags |= MLX5_ENCAP_ENTRY_VALID;
1596 mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
1600 destroy_neigh_entry:
1601 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
1603 kfree(encap_header);
1609 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
1610 struct ip_tunnel_info *tun_info,
1611 struct net_device *mirred_dev,
1612 struct net_device **encap_dev,
1613 struct mlx5e_tc_flow *flow)
1615 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1616 struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
1617 unsigned short family = ip_tunnel_info_af(tun_info);
1618 struct mlx5e_priv *up_priv = netdev_priv(up_dev);
1619 struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1620 struct ip_tunnel_key *key = &tun_info->key;
1621 struct mlx5e_encap_entry *e;
1622 int tunnel_type, err = 0;
1626 /* udp dst port must be set */
1627 if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
1628 goto vxlan_encap_offload_err;
1630 /* setting udp src port isn't supported */
1631 if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) {
1632 vxlan_encap_offload_err:
1633 netdev_warn(priv->netdev,
1634 "must set udp dst port and not set udp src port\n");
1638 if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->tp_dst)) &&
1639 MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
1640 tunnel_type = MLX5_HEADER_TYPE_VXLAN;
1642 netdev_warn(priv->netdev,
1643 "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst));
1647 hash_key = hash_encap_info(key);
1649 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
1650 encap_hlist, hash_key) {
1651 if (!cmp_encap_info(&e->tun_info.key, key)) {
1660 e = kzalloc(sizeof(*e), GFP_KERNEL);
1664 e->tun_info = *tun_info;
1665 e->tunnel_type = tunnel_type;
1666 INIT_LIST_HEAD(&e->flows);
1668 if (family == AF_INET)
1669 err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e);
1670 else if (family == AF_INET6)
1671 err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e);
1673 if (err && err != -EAGAIN)
1676 hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
1679 list_add(&flow->encap, &e->flows);
1680 *encap_dev = e->out_dev;
1681 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
1682 attr->encap_id = e->encap_id;
1691 static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
1692 struct mlx5e_tc_flow_parse_attr *parse_attr,
1693 struct mlx5e_tc_flow *flow)
1695 struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1696 struct mlx5e_rep_priv *rpriv = priv->ppriv;
1697 struct ip_tunnel_info *info = NULL;
1698 const struct tc_action *a;
1703 if (tc_no_actions(exts))
1706 memset(attr, 0, sizeof(*attr));
1707 attr->in_rep = rpriv->rep;
1709 tcf_exts_to_list(exts, &actions);
1710 list_for_each_entry(a, &actions, list) {
1711 if (is_tcf_gact_shot(a)) {
1712 attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
1713 MLX5_FLOW_CONTEXT_ACTION_COUNT;
1717 if (is_tcf_pedit(a)) {
1718 err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB,
1723 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1727 if (is_tcf_csum(a)) {
1728 if (csum_offload_supported(priv, attr->action,
1729 tcf_csum_update_flags(a)))
1735 if (is_tcf_mirred_egress_redirect(a)) {
1736 int ifindex = tcf_mirred_ifindex(a);
1737 struct net_device *out_dev, *encap_dev = NULL;
1738 struct mlx5e_priv *out_priv;
1740 out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
1742 if (switchdev_port_same_parent_id(priv->netdev,
1744 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1745 MLX5_FLOW_CONTEXT_ACTION_COUNT;
1746 out_priv = netdev_priv(out_dev);
1747 rpriv = out_priv->ppriv;
1748 attr->out_rep = rpriv->rep;
1750 err = mlx5e_attach_encap(priv, info,
1751 out_dev, &encap_dev, flow);
1752 if (err && err != -EAGAIN)
1754 attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
1755 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1756 MLX5_FLOW_CONTEXT_ACTION_COUNT;
1757 out_priv = netdev_priv(encap_dev);
1758 rpriv = out_priv->ppriv;
1759 attr->out_rep = rpriv->rep;
1760 attr->parse_attr = parse_attr;
1762 pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
1763 priv->netdev->name, out_dev->name);
1769 if (is_tcf_tunnel_set(a)) {
1770 info = tcf_tunnel_info(a);
1778 if (is_tcf_vlan(a)) {
1779 if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
1780 attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
1781 } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
1782 if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q))
1785 attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
1786 attr->vlan = tcf_vlan_push_vid(a);
1787 } else { /* action is TCA_VLAN_ACT_MODIFY */
1793 if (is_tcf_tunnel_release(a)) {
1794 attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
1803 int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
1804 struct tc_cls_flower_offload *f)
1806 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1807 struct mlx5e_tc_flow_parse_attr *parse_attr;
1808 struct mlx5e_tc_table *tc = &priv->fs.tc;
1809 struct mlx5e_tc_flow *flow;
1810 int attr_size, err = 0;
1813 if (esw && esw->mode == SRIOV_OFFLOADS) {
1814 flow_flags = MLX5E_TC_FLOW_ESWITCH;
1815 attr_size = sizeof(struct mlx5_esw_flow_attr);
1817 flow_flags = MLX5E_TC_FLOW_NIC;
1818 attr_size = sizeof(struct mlx5_nic_flow_attr);
1821 flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
1822 parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
1823 if (!parse_attr || !flow) {
1828 flow->cookie = f->cookie;
1829 flow->flags = flow_flags;
1831 err = parse_cls_flower(priv, flow, &parse_attr->spec, f);
1835 if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
1836 err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow);
1838 goto err_handle_encap_flow;
1839 flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow);
1841 err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow);
1844 flow->rule = mlx5e_tc_add_nic_flow(priv, parse_attr, flow);
1847 if (IS_ERR(flow->rule)) {
1848 err = PTR_ERR(flow->rule);
1852 flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
1853 err = rhashtable_insert_fast(&tc->ht, &flow->node,
1858 if (flow->flags & MLX5E_TC_FLOW_ESWITCH &&
1859 !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP))
1864 mlx5e_tc_del_flow(priv, flow);
1866 err_handle_encap_flow:
1867 if (err == -EAGAIN) {
1868 err = rhashtable_insert_fast(&tc->ht, &flow->node,
1871 mlx5e_tc_del_flow(priv, flow);
1882 int mlx5e_delete_flower(struct mlx5e_priv *priv,
1883 struct tc_cls_flower_offload *f)
1885 struct mlx5e_tc_flow *flow;
1886 struct mlx5e_tc_table *tc = &priv->fs.tc;
1888 flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
1893 rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params);
1895 mlx5e_tc_del_flow(priv, flow);
1902 int mlx5e_stats_flower(struct mlx5e_priv *priv,
1903 struct tc_cls_flower_offload *f)
1905 struct mlx5e_tc_table *tc = &priv->fs.tc;
1906 struct mlx5e_tc_flow *flow;
1907 struct mlx5_fc *counter;
1912 flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
1917 if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED))
1920 counter = mlx5_flow_rule_counter(flow->rule);
1924 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
1926 tcf_exts_stats_update(f->exts, bytes, packets, lastuse);
1931 static const struct rhashtable_params mlx5e_tc_flow_ht_params = {
1932 .head_offset = offsetof(struct mlx5e_tc_flow, node),
1933 .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
1934 .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
1935 .automatic_shrinking = true,
1938 int mlx5e_tc_init(struct mlx5e_priv *priv)
1940 struct mlx5e_tc_table *tc = &priv->fs.tc;
1942 tc->ht_params = mlx5e_tc_flow_ht_params;
1943 return rhashtable_init(&tc->ht, &tc->ht_params);
1946 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
1948 struct mlx5e_tc_flow *flow = ptr;
1949 struct mlx5e_priv *priv = arg;
1951 mlx5e_tc_del_flow(priv, flow);
1955 void mlx5e_tc_cleanup(struct mlx5e_priv *priv)
1957 struct mlx5e_tc_table *tc = &priv->fs.tc;
1959 rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, priv);
1961 if (!IS_ERR_OR_NULL(tc->t)) {
1962 mlx5_destroy_flow_table(tc->t);