net/mlx5e: Support offloading TC NIC hairpin flows
authorOr Gerlitz <ogerlitz@mellanox.com>
Wed, 22 Nov 2017 19:09:05 +0000 (21:09 +0200)
committerSaeed Mahameed <saeedm@mellanox.com>
Tue, 9 Jan 2018 05:40:48 +0000 (07:40 +0200)
We refer to TC NIC rule that involves forwarding as "hairpin".

All hairpin rules from the current NIC device (called "func" in
the code) to a given NIC device ("peer") are steered into the
same hairpin RQ/SQ pair.

The hairpin pair is set on demand and removed when there are no
TC rules that need it.

Here's a TC rule that matches on icmp, does header re-write of the
dst mac and hairpin from RX/enp1s2f1 to TX/enp1s2f2 (enp1s2f1/2 are
two mlx5 devices):

tc filter add dev enp1s2f1 protocol ip parent ffff: prio 2
    flower skip_sw ip_proto icmp
     action pedit ex munge eth dst set 10:22:33:44:55:66 pipe
     action mirred egress redirect dev enp1s2f2

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c

index 5299310f2481d9a276a559cf32b58046d557f880..72bab8d3f4b03110e8ea87ecf4f0279fed05b947 100644 (file)
@@ -659,6 +659,7 @@ struct mlx5e_tc_table {
        struct rhashtable               ht;
 
        DECLARE_HASHTABLE(mod_hdr_tbl, 8);
+       DECLARE_HASHTABLE(hairpin_tbl, 8);
 };
 
 struct mlx5e_vlan_table {
index 55a527bda2e5adec40a361819f27d944e096f881..cf528da51243da928bdfa7b4ae590f5805aa76de 100644 (file)
@@ -56,12 +56,14 @@ struct mlx5_nic_flow_attr {
        u32 action;
        u32 flow_tag;
        u32 mod_hdr_id;
+       u32 hairpin_tirn;
 };
 
 enum {
        MLX5E_TC_FLOW_ESWITCH   = BIT(0),
        MLX5E_TC_FLOW_NIC       = BIT(1),
        MLX5E_TC_FLOW_OFFLOADED = BIT(2),
+       MLX5E_TC_FLOW_HAIRPIN   = BIT(3),
 };
 
 struct mlx5e_tc_flow {
@@ -71,6 +73,7 @@ struct mlx5e_tc_flow {
        struct mlx5_flow_handle *rule;
        struct list_head        encap;   /* flows sharing the same encap ID */
        struct list_head        mod_hdr; /* flows sharing the same mod hdr ID */
+       struct list_head        hairpin; /* flows sharing the same hairpin */
        union {
                struct mlx5_esw_flow_attr esw_attr[0];
                struct mlx5_nic_flow_attr nic_attr[0];
@@ -101,6 +104,17 @@ struct mlx5e_hairpin {
        u32 tirn;
 };
 
+struct mlx5e_hairpin_entry {
+       /* a node of a hash table which keeps all the  hairpin entries */
+       struct hlist_node hairpin_hlist;
+
+       /* flows sharing the same hairpin */
+       struct list_head flows;
+
+       int peer_ifindex;
+       struct mlx5e_hairpin *hp;
+};
+
 struct mod_hdr_key {
        int num_actions;
        void *actions;
@@ -319,6 +333,98 @@ static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
        kvfree(hp);
 }
 
+static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
+                                                    int peer_ifindex)
+{
+       struct mlx5e_hairpin_entry *hpe;
+
+       hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
+                              hairpin_hlist, peer_ifindex) {
+               if (hpe->peer_ifindex == peer_ifindex)
+                       return hpe;
+       }
+
+       return NULL;
+}
+
+static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
+                                 struct mlx5e_tc_flow *flow,
+                                 struct mlx5e_tc_flow_parse_attr *parse_attr)
+{
+       int peer_ifindex = parse_attr->mirred_ifindex;
+       struct mlx5_hairpin_params params;
+       struct mlx5e_hairpin_entry *hpe;
+       struct mlx5e_hairpin *hp;
+       int err;
+
+       if (!MLX5_CAP_GEN(priv->mdev, hairpin)) {
+               netdev_warn(priv->netdev, "hairpin is not supported\n");
+               return -EOPNOTSUPP;
+       }
+
+       hpe = mlx5e_hairpin_get(priv, peer_ifindex);
+       if (hpe)
+               goto attach_flow;
+
+       hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
+       if (!hpe)
+               return -ENOMEM;
+
+       INIT_LIST_HEAD(&hpe->flows);
+       hpe->peer_ifindex = peer_ifindex;
+
+       params.log_data_size = 15;
+       params.log_data_size = min_t(u8, params.log_data_size,
+                                    MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
+       params.log_data_size = max_t(u8, params.log_data_size,
+                                    MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
+       params.q_counter = priv->q_counter;
+
+       hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
+       if (IS_ERR(hp)) {
+               err = PTR_ERR(hp);
+               goto create_hairpin_err;
+       }
+
+       netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x log data size %d\n",
+                  hp->tirn, hp->pair->rqn, hp->pair->peer_mdev->priv.name,
+                  hp->pair->sqn, params.log_data_size);
+
+       hpe->hp = hp;
+       hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, peer_ifindex);
+
+attach_flow:
+       flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
+       list_add(&flow->hairpin, &hpe->flows);
+       return 0;
+
+create_hairpin_err:
+       kfree(hpe);
+       return err;
+}
+
+static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
+                                  struct mlx5e_tc_flow *flow)
+{
+       struct list_head *next = flow->hairpin.next;
+
+       list_del(&flow->hairpin);
+
+       /* no more hairpin flows for us, release the hairpin pair */
+       if (list_empty(next)) {
+               struct mlx5e_hairpin_entry *hpe;
+
+               hpe = list_entry(next, struct mlx5e_hairpin_entry, flows);
+
+               netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
+                          hpe->hp->pair->peer_mdev->priv.name);
+
+               mlx5e_hairpin_destroy(hpe->hp);
+               hash_del(&hpe->hairpin_hlist);
+               kfree(hpe);
+       }
+}
+
 static struct mlx5_flow_handle *
 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
                      struct mlx5e_tc_flow_parse_attr *parse_attr,
@@ -326,7 +432,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 {
        struct mlx5_nic_flow_attr *attr = flow->nic_attr;
        struct mlx5_core_dev *dev = priv->mdev;
-       struct mlx5_flow_destination dest = {};
+       struct mlx5_flow_destination dest[2] = {};
        struct mlx5_flow_act flow_act = {
                .action = attr->action,
                .flow_tag = attr->flow_tag,
@@ -335,18 +441,33 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
        struct mlx5_fc *counter = NULL;
        struct mlx5_flow_handle *rule;
        bool table_created = false;
-       int err;
+       int err, dest_ix = 0;
 
        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
-               dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-               dest.ft = priv->fs.vlan.ft.t;
-       } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
-               counter = mlx5_fc_create(dev, true);
-               if (IS_ERR(counter))
-                       return ERR_CAST(counter);
+               if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
+                       err = mlx5e_hairpin_flow_add(priv, flow, parse_attr);
+                       if (err) {
+                               rule = ERR_PTR(err);
+                               goto err_add_hairpin_flow;
+                       }
+                       dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+                       dest[dest_ix].tir_num = attr->hairpin_tirn;
+               } else {
+                       dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+                       dest[dest_ix].ft = priv->fs.vlan.ft.t;
+               }
+               dest_ix++;
+       }
 
-               dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-               dest.counter = counter;
+       if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+               counter = mlx5_fc_create(dev, true);
+               if (IS_ERR(counter)) {
+                       rule = ERR_CAST(counter);
+                       goto err_fc_create;
+               }
+               dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+               dest[dest_ix].counter = counter;
+               dest_ix++;
        }
 
        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
@@ -389,7 +510,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 
        parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
        rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
-                                  &flow_act, &dest, 1);
+                                  &flow_act, dest, dest_ix);
 
        if (IS_ERR(rule))
                goto err_add_rule;
@@ -406,7 +527,10 @@ err_create_ft:
                mlx5e_detach_mod_hdr(priv, flow);
 err_create_mod_hdr_id:
        mlx5_fc_destroy(dev, counter);
-
+err_fc_create:
+       if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
+               mlx5e_hairpin_flow_del(priv, flow);
+err_add_hairpin_flow:
        return rule;
 }
 
@@ -427,6 +551,9 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
 
        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
                mlx5e_detach_mod_hdr(priv, flow);
+
+       if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
+               mlx5e_hairpin_flow_del(priv, flow);
 }
 
 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
@@ -1519,6 +1646,20 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
        return true;
 }
 
+static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
+{
+       struct mlx5_core_dev *fmdev, *pmdev;
+       u16 func_id, peer_id;
+
+       fmdev = priv->mdev;
+       pmdev = peer_priv->mdev;
+
+       func_id = (u16)((fmdev->pdev->bus->number << 8) | PCI_SLOT(fmdev->pdev->devfn));
+       peer_id = (u16)((pmdev->pdev->bus->number << 8) | PCI_SLOT(pmdev->pdev->devfn));
+
+       return (func_id == peer_id);
+}
+
 static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                                struct mlx5e_tc_flow_parse_attr *parse_attr,
                                struct mlx5e_tc_flow *flow)
@@ -1563,6 +1704,23 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                        return -EOPNOTSUPP;
                }
 
+               if (is_tcf_mirred_egress_redirect(a)) {
+                       struct net_device *peer_dev = tcf_mirred_dev(a);
+
+                       if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
+                           same_hw_devs(priv, netdev_priv(peer_dev))) {
+                               parse_attr->mirred_ifindex = peer_dev->ifindex;
+                               flow->flags |= MLX5E_TC_FLOW_HAIRPIN;
+                               attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+                                               MLX5_FLOW_CONTEXT_ACTION_COUNT;
+                       } else {
+                               netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
+                                           peer_dev->name);
+                               return -EINVAL;
+                       }
+                       continue;
+               }
+
                if (is_tcf_skbedit_mark(a)) {
                        u32 mark = tcf_skbedit_mark(a);
 
@@ -2285,6 +2443,7 @@ int mlx5e_tc_init(struct mlx5e_priv *priv)
        struct mlx5e_tc_table *tc = &priv->fs.tc;
 
        hash_init(tc->mod_hdr_tbl);
+       hash_init(tc->hairpin_tbl);
 
        tc->ht_params = mlx5e_tc_flow_ht_params;
        return rhashtable_init(&tc->ht, &tc->ht_params);