net/mlx5e: Support SRIOV TC encapsulation offloads for IPv6 tunnels

[sfrench/cifs-2.6.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_tc.c
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c

index f8829b5171560ed2f51cd05b8c82d2a076ce8cbb..640f10f2e994f462f260ae46a14f5b2add0ce15e 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -161,15 +161,21 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
         }
  }
  
+/* we get here also when setting rule to the FW failed, etc. It means that the
+ * flow rule itself might not exist, but some offloading related to the actions
+ * should be cleaned.
+ */
  static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
                               struct mlx5e_tc_flow *flow)
  {
         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
         struct mlx5_fc *counter = NULL;
  
-       counter = mlx5_flow_rule_counter(flow->rule);
-
-       mlx5_del_flow_rules(flow->rule);
+       if (!IS_ERR(flow->rule)) {
+               counter = mlx5_flow_rule_counter(flow->rule);
+               mlx5_del_flow_rules(flow->rule);
+               mlx5_fc_destroy(priv->mdev, counter);
+       }
  
         if (esw && esw->mode == SRIOV_OFFLOADS) {
                 mlx5_eswitch_del_vlan_action(esw, flow->attr);
@@ -177,8 +183,6 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
                         mlx5e_detach_encap(priv, flow);
         }
  
-       mlx5_fc_destroy(priv->mdev, counter);
-
         if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
                 mlx5_destroy_flow_table(priv->fs.tc.t);
                 priv->fs.tc.t = NULL;
@@ -225,6 +229,11 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
         void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
                                        outer_headers);
  
+       struct flow_dissector_key_control *enc_control =
+               skb_flow_dissector_target(f->dissector,
+                                         FLOW_DISSECTOR_KEY_ENC_CONTROL,
+                                         f->key);
+
         if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
                 struct flow_dissector_key_ports *key =
                         skb_flow_dissector_target(f->dissector,
@@ -237,28 +246,34 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
  
                 /* Full udp dst port must be given */
                 if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
-                       return -EOPNOTSUPP;
-
-               /* udp src port isn't supported */
-               if (memchr_inv(&mask->src, 0, sizeof(mask->src)))
-                       return -EOPNOTSUPP;
+                       goto vxlan_match_offload_err;
  
                 if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) &&
                     MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
                         parse_vxlan_attr(spec, f);
-               else
+               else {
+                       netdev_warn(priv->netdev,
+                                   "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst));
                         return -EOPNOTSUPP;
+               }
  
                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
                          udp_dport, ntohs(mask->dst));
                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
                          udp_dport, ntohs(key->dst));
  
+               MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+                        udp_sport, ntohs(mask->src));
+               MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+                        udp_sport, ntohs(key->src));
         } else { /* udp dst port must be given */
-                       return -EOPNOTSUPP;
+vxlan_match_offload_err:
+               netdev_warn(priv->netdev,
+                           "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
+               return -EOPNOTSUPP;
         }
  
-       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+       if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
                 struct flow_dissector_key_ipv4_addrs *key =
                         skb_flow_dissector_target(f->dissector,
                                                   FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
@@ -280,10 +295,36 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
                          dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
                          ntohl(key->dst));
-       }
  
-       MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
-       MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
+               MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
+               MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
+       } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+               struct flow_dissector_key_ipv6_addrs *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
+                                                 f->key);
+               struct flow_dissector_key_ipv6_addrs *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
+                                                 f->mask);
+
+               memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+                                   src_ipv4_src_ipv6.ipv6_layout.ipv6),
+                      &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+               memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+                                   src_ipv4_src_ipv6.ipv6_layout.ipv6),
+                      &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+
+               memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+                                   dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+                      &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+               memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+                                   dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+                      &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+
+               MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
+               MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6);
+       }
  
         /* Enforce DMAC when offloading incoming tunneled flows.
          * Flow counters require a match on the DMAC.
@@ -343,6 +384,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                                                   f->key);
                 switch (key->addr_type) {
                 case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+               case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
                         if (parse_tunnel_attr(priv, spec, f))
                                 return -EOPNOTSUPP;
                         break;
@@ -375,6 +417,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
                                  key->flags & FLOW_DIS_IS_FRAGMENT);
+
+                       /* the HW doesn't need L3 inline to match on frag=no */
+                       if (key->flags & FLOW_DIS_IS_FRAGMENT)
+                               *min_inline = MLX5_INLINE_MODE_IP;
                 }
         }
  
@@ -438,8 +484,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                                                   FLOW_DISSECTOR_KEY_VLAN,
                                                   f->mask);
                 if (mask->vlan_id || mask->vlan_priority) {
-                       MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1);
-                       MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1);
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
  
                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
@@ -622,15 +668,15 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
         return 0;
  }
  
-static inline int cmp_encap_info(struct mlx5_encap_info *a,
-                                struct mlx5_encap_info *b)
+static inline int cmp_encap_info(struct ip_tunnel_key *a,
+                                struct ip_tunnel_key *b)
  {
         return memcmp(a, b, sizeof(*a));
  }
  
-static inline int hash_encap_info(struct mlx5_encap_info *info)
+static inline int hash_encap_info(struct ip_tunnel_key *key)
  {
-       return jhash(info, sizeof(*info), 0);
+       return jhash(key, sizeof(*key), 0);
  }
  
  static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
@@ -638,44 +684,81 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
                                    struct net_device **out_dev,
                                    struct flowi4 *fl4,
                                    struct neighbour **out_n,
-                                  __be32 *saddr,
                                    int *out_ttl)
  {
         struct rtable *rt;
         struct neighbour *n = NULL;
-       int ttl;
  
  #if IS_ENABLED(CONFIG_INET)
+       int ret;
+
         rt = ip_route_output_key(dev_net(mirred_dev), fl4);
-       if (IS_ERR(rt)) {
-               pr_warn("%s: no route to %pI4\n", __func__, &fl4->daddr);
-               return -EOPNOTSUPP;
-       }
+       ret = PTR_ERR_OR_ZERO(rt);
+       if (ret)
+               return ret;
  #else
         return -EOPNOTSUPP;
  #endif
  
         if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) {
-               pr_warn("%s: Can't offload the flow, netdevices aren't on the same HW e-switch\n",
-                       __func__);
+               pr_warn("%s: can't offload, devices not on same HW e-switch\n", __func__);
                 ip_rt_put(rt);
                 return -EOPNOTSUPP;
         }
  
-       ttl = ip4_dst_hoplimit(&rt->dst);
+       *out_ttl = ip4_dst_hoplimit(&rt->dst);
         n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
         ip_rt_put(rt);
         if (!n)
                 return -ENOMEM;
  
         *out_n = n;
-       *saddr = fl4->saddr;
-       *out_ttl = ttl;
         *out_dev = rt->dst.dev;
  
         return 0;
  }
  
+static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
+                                  struct net_device *mirred_dev,
+                                  struct net_device **out_dev,
+                                  struct flowi6 *fl6,
+                                  struct neighbour **out_n,
+                                  int *out_ttl)
+{
+       struct neighbour *n = NULL;
+       struct dst_entry *dst;
+
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       int ret;
+
+       dst = ip6_route_output(dev_net(mirred_dev), NULL, fl6);
+       if (dst->error) {
+               ret = dst->error;
+               dst_release(dst);
+               return ret;
+       }
+
+       *out_ttl = ip6_dst_hoplimit(dst);
+
+       /* if the egress device isn't on the same HW e-switch, we use the uplink */
+       if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
+               *out_dev = mlx5_eswitch_get_uplink_netdev(esw);
+       else
+               *out_dev = dst->dev;
+#else
+       return -EOPNOTSUPP;
+#endif
+
+       n = dst_neigh_lookup(dst, &fl6->daddr);
+       dst_release(dst);
+       if (!n)
+               return -ENOMEM;
+
+       *out_n = n;
+       return 0;
+}
+
  static int gen_vxlan_header_ipv4(struct net_device *out_dev,
                                  char buf[],
                                  unsigned char h_dest[ETH_ALEN],
@@ -712,19 +795,52 @@ static int gen_vxlan_header_ipv4(struct net_device *out_dev,
         return encap_size;
  }
  
+static int gen_vxlan_header_ipv6(struct net_device *out_dev,
+                                char buf[],
+                                unsigned char h_dest[ETH_ALEN],
+                                int ttl,
+                                struct in6_addr *daddr,
+                                struct in6_addr *saddr,
+                                __be16 udp_dst_port,
+                                __be32 vx_vni)
+{
+       int encap_size = VXLAN_HLEN + sizeof(struct ipv6hdr) + ETH_HLEN;
+       struct ethhdr *eth = (struct ethhdr *)buf;
+       struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
+       struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
+       struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
+
+       memset(buf, 0, encap_size);
+
+       ether_addr_copy(eth->h_dest, h_dest);
+       ether_addr_copy(eth->h_source, out_dev->dev_addr);
+       eth->h_proto = htons(ETH_P_IPV6);
+
+       ip6_flow_hdr(ip6h, 0, 0);
+       /* the HW fills up ipv6 payload len */
+       ip6h->nexthdr     = IPPROTO_UDP;
+       ip6h->hop_limit   = ttl;
+       ip6h->daddr       = *daddr;
+       ip6h->saddr       = *saddr;
+
+       udp->dest = udp_dst_port;
+       vxh->vx_flags = VXLAN_HF_VNI;
+       vxh->vx_vni = vxlan_vni_field(vx_vni);
+
+       return encap_size;
+}
+
  static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
                                           struct net_device *mirred_dev,
                                           struct mlx5_encap_entry *e,
                                           struct net_device **out_dev)
  {
         int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+       struct ip_tunnel_key *tun_key = &e->tun_info.key;
+       int encap_size, ttl, err;
+       struct neighbour *n = NULL;
         struct flowi4 fl4 = {};
-       struct neighbour *n;
         char *encap_header;
-       int encap_size;
-       __be32 saddr;
-       int ttl;
-       int err;
  
         encap_header = kzalloc(max_encap_size, GFP_KERNEL);
         if (!encap_header)
@@ -733,36 +849,108 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
         switch (e->tunnel_type) {
         case MLX5_HEADER_TYPE_VXLAN:
                 fl4.flowi4_proto = IPPROTO_UDP;
-               fl4.fl4_dport = e->tun_info.tp_dst;
+               fl4.fl4_dport = tun_key->tp_dst;
                 break;
         default:
                 err = -EOPNOTSUPP;
                 goto out;
         }
-       fl4.daddr = e->tun_info.daddr;
+       fl4.flowi4_tos = tun_key->tos;
+       fl4.daddr = tun_key->u.ipv4.dst;
+       fl4.saddr = tun_key->u.ipv4.src;
  
         err = mlx5e_route_lookup_ipv4(priv, mirred_dev, out_dev,
-                                     &fl4, &n, &saddr, &ttl);
+                                     &fl4, &n, &ttl);
         if (err)
                 goto out;
  
+       if (!(n->nud_state & NUD_VALID)) {
+               pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__, &fl4.daddr);
+               err = -EOPNOTSUPP;
+               goto out;
+       }
+
         e->n = n;
         e->out_dev = *out_dev;
  
+       neigh_ha_snapshot(e->h_dest, n, *out_dev);
+
+       switch (e->tunnel_type) {
+       case MLX5_HEADER_TYPE_VXLAN:
+               encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header,
+                                                  e->h_dest, ttl,
+                                                  fl4.daddr,
+                                                  fl4.saddr, tun_key->tp_dst,
+                                                  tunnel_id_to_key32(tun_key->tun_id));
+               break;
+       default:
+               err = -EOPNOTSUPP;
+               goto out;
+       }
+
+       err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
+                              encap_size, encap_header, &e->encap_id);
+out:
+       if (err && n)
+               neigh_release(n);
+       kfree(encap_header);
+       return err;
+}
+
+static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
+                                         struct net_device *mirred_dev,
+                                         struct mlx5_encap_entry *e,
+                                         struct net_device **out_dev)
+
+{
+       int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+       struct ip_tunnel_key *tun_key = &e->tun_info.key;
+       int encap_size, err, ttl = 0;
+       struct neighbour *n = NULL;
+       struct flowi6 fl6 = {};
+       char *encap_header;
+
+       encap_header = kzalloc(max_encap_size, GFP_KERNEL);
+       if (!encap_header)
+               return -ENOMEM;
+
+       switch (e->tunnel_type) {
+       case MLX5_HEADER_TYPE_VXLAN:
+               fl6.flowi6_proto = IPPROTO_UDP;
+               fl6.fl6_dport = tun_key->tp_dst;
+               break;
+       default:
+               err = -EOPNOTSUPP;
+               goto out;
+       }
+
+       fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
+       fl6.daddr = tun_key->u.ipv6.dst;
+       fl6.saddr = tun_key->u.ipv6.src;
+
+       err = mlx5e_route_lookup_ipv6(priv, mirred_dev, out_dev,
+                                     &fl6, &n, &ttl);
+       if (err)
+               goto out;
+
         if (!(n->nud_state & NUD_VALID)) {
-               err = -ENOTSUPP;
+               pr_warn("%s: can't offload, neighbour to %pI6 invalid\n", __func__, &fl6.daddr);
+               err = -EOPNOTSUPP;
                 goto out;
         }
  
+       e->n = n;
+       e->out_dev = *out_dev;
+
         neigh_ha_snapshot(e->h_dest, n, *out_dev);
  
         switch (e->tunnel_type) {
         case MLX5_HEADER_TYPE_VXLAN:
-               encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header,
+               encap_size = gen_vxlan_header_ipv6(*out_dev, encap_header,
                                                    e->h_dest, ttl,
-                                                  e->tun_info.daddr,
-                                                  saddr, e->tun_info.tp_dst,
-                                                  e->tun_info.tun_id);
+                                                  &fl6.daddr,
+                                                  &fl6.saddr, tun_key->tp_dst,
+                                                  tunnel_id_to_key32(tun_key->tun_id));
                 break;
         default:
                 err = -EOPNOTSUPP;
@@ -772,6 +960,8 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
         err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
                                encap_size, encap_header, &e->encap_id);
  out:
+       if (err && n)
+               neigh_release(n);
         kfree(encap_header);
         return err;
  }
@@ -784,40 +974,38 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
         unsigned short family = ip_tunnel_info_af(tun_info);
         struct ip_tunnel_key *key = &tun_info->key;
-       struct mlx5_encap_info info;
         struct mlx5_encap_entry *e;
         struct net_device *out_dev;
+       int tunnel_type, err = -EOPNOTSUPP;
         uintptr_t hash_key;
         bool found = false;
-       int tunnel_type;
-       int err;
  
-       /* udp dst port must be given */
+       /* udp dst port must be set */
         if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
+               goto vxlan_encap_offload_err;
+
+       /* setting udp src port isn't supported */
+       if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) {
+vxlan_encap_offload_err:
+               netdev_warn(priv->netdev,
+                           "must set udp dst port and not set udp src port\n");
                 return -EOPNOTSUPP;
+       }
  
         if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) &&
             MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
-               info.tp_dst = key->tp_dst;
-               info.tun_id = tunnel_id_to_key32(key->tun_id);
                 tunnel_type = MLX5_HEADER_TYPE_VXLAN;
         } else {
+               netdev_warn(priv->netdev,
+                           "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst));
                 return -EOPNOTSUPP;
         }
  
-       switch (family) {
-       case AF_INET:
-               info.daddr = key->u.ipv4.dst;
-               break;
-       default:
-               return -EOPNOTSUPP;
-       }
-
-       hash_key = hash_encap_info(&info);
+       hash_key = hash_encap_info(key);
  
         hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
                                    encap_hlist, hash_key) {
-               if (!cmp_encap_info(&e->tun_info, &info)) {
+               if (!cmp_encap_info(&e->tun_info.key, key)) {
                         found = true;
                         break;
                 }
@@ -832,11 +1020,15 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
         if (!e)
                 return -ENOMEM;
  
-       e->tun_info = info;
+       e->tun_info = *tun_info;
         e->tunnel_type = tunnel_type;
         INIT_LIST_HEAD(&e->flows);
  
-       err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev);
+       if (family == AF_INET)
+               err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev);
+       else if (family == AF_INET6)
+               err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e, &out_dev);
+
         if (err)
                 goto out_err;
  
@@ -986,7 +1178,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
  
         if (IS_ERR(flow->rule)) {
                 err = PTR_ERR(flow->rule);
-               goto err_free;
+               goto err_del_rule;
         }
  
         err = rhashtable_insert_fast(&tc->ht, &flow->node,
@@ -997,7 +1189,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
         goto out;
  
  err_del_rule:
-       mlx5_del_flow_rules(flow->rule);
+       mlx5e_tc_del_flow(priv, flow);
  
  err_free:
         kfree(flow);