IB/cm: Replace members of sa_path_rec with 'struct sgid_attr *'
authorParav Pandit <parav@mellanox.com>
Tue, 19 Jun 2018 07:59:19 +0000 (10:59 +0300)
committerJason Gunthorpe <jgg@mellanox.com>
Mon, 25 Jun 2018 20:19:57 +0000 (14:19 -0600)
While processing a path record entry in CM messages the associated GID
attribute is now also supplied.

Currently for RoCE a netdevice's net namespace pointer and ifindex are
stored in path record entry. Both of these fields of the netdev can change
anytime while processing CM messages. Additionally storing net namespace
without holding reference will lead to use-after-free crash. Therefore it
is removed. Netdevice information for RoCE is instead provided via
referenced gid attribute in ib_cm requests.

Such a design leads to a situation where the kernel can crash when the net
pointer becomes invalid. However today it is always initialized to
init_net, which cannot become invalid. In order to support processing
packets in any arbitrary namespace of the received packet, it is necessary
to avoid such conditions.

This patch removes the dependency on the net pointer and ifindex; instead
it will rely on SGID attribute which contains a pointer to netdev.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/sa_query.c
drivers/infiniband/core/uverbs_marshall.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
include/rdma/ib_sa.h

index 00c90d4f27bb1ba1d91a1ac00d06b4c7e87fbaa3..c2b7edf5857f2a20af9eeec464a268eccdadd01e 100644 (file)
@@ -508,31 +508,50 @@ static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
        return ret;
 }
 
-static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path)
+static struct cm_port *
+get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
 {
        struct cm_device *cm_dev;
        struct cm_port *port = NULL;
        unsigned long flags;
-       u8 p;
-       struct net_device *ndev = ib_get_ndev_from_path(path);
-
-       read_lock_irqsave(&cm.device_lock, flags);
-       list_for_each_entry(cm_dev, &cm.device_list, list) {
-               if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
-                                       sa_conv_pathrec_to_gid_type(path),
-                                       ndev, &p, NULL)) {
-                       port = cm_dev->port[p - 1];
-                       break;
+
+       if (attr) {
+               read_lock_irqsave(&cm.device_lock, flags);
+               list_for_each_entry(cm_dev, &cm.device_list, list) {
+                       if (cm_dev->ib_device == attr->device) {
+                               port = cm_dev->port[attr->port_num - 1];
+                               break;
+                       }
+               }
+               read_unlock_irqrestore(&cm.device_lock, flags);
+       } else {
+               /* SGID attribute can be NULL in following
+                * conditions.
+                * (a) Alternative path
+                * (b) IB link layer without GRH
+                * (c) LAP send messages
+                */
+               read_lock_irqsave(&cm.device_lock, flags);
+               list_for_each_entry(cm_dev, &cm.device_list, list) {
+                       attr = rdma_find_gid(cm_dev->ib_device,
+                                            &path->sgid,
+                                            sa_conv_pathrec_to_gid_type(path),
+                                            NULL);
+                       if (!IS_ERR(attr)) {
+                               port = cm_dev->port[attr->port_num - 1];
+                               break;
+                       }
                }
+               read_unlock_irqrestore(&cm.device_lock, flags);
+               if (port)
+                       rdma_put_gid_attr(attr);
        }
-       read_unlock_irqrestore(&cm.device_lock, flags);
-
-       if (ndev)
-               dev_put(ndev);
        return port;
 }
 
-static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
+static int cm_init_av_by_path(struct sa_path_rec *path,
+                             const struct ib_gid_attr *sgid_attr,
+                             struct cm_av *av,
                              struct cm_id_private *cm_id_priv)
 {
        struct rdma_ah_attr new_ah_attr;
@@ -540,7 +559,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
        struct cm_port *port;
        int ret;
 
-       port = get_cm_port_from_path(path);
+       port = get_cm_port_from_path(path, sgid_attr);
        if (!port)
                return -EINVAL;
        cm_dev = port->cm_dev;
@@ -562,7 +581,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
         * can be used to return an error response.
         */
        ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
-                                       &new_ah_attr);
+                                       &new_ah_attr, sgid_attr);
        if (ret)
                return ret;
 
@@ -1420,12 +1439,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
                goto out;
        }
 
-       ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av,
+       ret = cm_init_av_by_path(param->primary_path,
+                                param->ppath_sgid_attr, &cm_id_priv->av,
                                 cm_id_priv);
        if (ret)
                goto error1;
        if (param->alternate_path) {
-               ret = cm_init_av_by_path(param->alternate_path,
+               ret = cm_init_av_by_path(param->alternate_path, NULL,
                                         &cm_id_priv->alt_av, cm_id_priv);
                if (ret)
                        goto error1;
@@ -1980,10 +2000,6 @@ static int cm_req_handler(struct cm_work *work)
        if (gid_attr.ndev) {
                work->path[0].rec_type =
                        sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
-               sa_path_set_ifindex(&work->path[0],
-                                   gid_attr.ndev->ifindex);
-               sa_path_set_ndev(&work->path[0],
-                                dev_net(gid_attr.ndev));
                dev_put(gid_attr.ndev);
        } else {
                cm_path_set_rec_type(work->port->cm_dev->ib_device,
@@ -1999,7 +2015,7 @@ static int cm_req_handler(struct cm_work *work)
                sa_path_set_dmac(&work->path[0],
                                 cm_id_priv->av.ah_attr.roce.dmac);
        work->path[0].hop_limit = grh->hop_limit;
-       ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
+       ret = cm_init_av_by_path(&work->path[0], &gid_attr, &cm_id_priv->av,
                                 cm_id_priv);
        if (ret) {
                int err;
@@ -2018,8 +2034,8 @@ static int cm_req_handler(struct cm_work *work)
                goto rejected;
        }
        if (cm_req_has_alt_path(req_msg)) {
-               ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
-                                        cm_id_priv);
+               ret = cm_init_av_by_path(&work->path[1], NULL,
+                                        &cm_id_priv->alt_av, cm_id_priv);
                if (ret) {
                        ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
                                       &work->path[0].sgid,
@@ -3142,7 +3158,7 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
                goto out;
        }
 
-       ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av,
+       ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av,
                                 cm_id_priv);
        if (ret)
                goto out;
@@ -3285,7 +3301,7 @@ static int cm_lap_handler(struct cm_work *work)
        if (ret)
                goto unlock;
 
-       cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
+       cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av,
                           cm_id_priv);
        cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
        cm_id_priv->tid = lap_msg->hdr.tid;
@@ -3487,7 +3503,9 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
                return -EINVAL;
 
        cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-       ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
+       ret = cm_init_av_by_path(param->path, param->sgid_attr,
+                                &cm_id_priv->av,
+                                cm_id_priv);
        if (ret)
                goto out;
 
index f0eeb43b388feb5dd8855d9c1e0e1847a23d32f8..a735ab4cdddaccb033aef25159808a16def0ddba 100644 (file)
@@ -2583,8 +2583,6 @@ cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv)
        route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
 
        route->path_rec->roce.route_resolved = true;
-       sa_path_set_ndev(route->path_rec, addr->dev_addr.net);
-       sa_path_set_ifindex(route->path_rec, ndev->ifindex);
        sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
        return ndev;
 }
@@ -3510,7 +3508,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
                ib_init_ah_attr_from_path(id_priv->id.device,
                                          id_priv->id.port_num,
                                          id_priv->id.route.path_rec,
-                                         &event.param.ud.ah_attr);
+                                         &event.param.ud.ah_attr,
+                                         rep->sgid_attr);
                event.param.ud.qp_num = rep->qpn;
                event.param.ud.qkey = rep->qkey;
                event.event = RDMA_CM_EVENT_ESTABLISHED;
index b6da4a6095f10a2027b7f9d2e976d91a921e14a4..7005afb8a71263a5fa7bfb6151a02c541f39d171 100644 (file)
@@ -1229,18 +1229,12 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
 
 static int
 roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
-                            struct sa_path_rec *rec)
+                            struct sa_path_rec *rec,
+                            const struct ib_gid_attr *attr)
 {
        struct net_device *resolved_dev;
-       struct net_device *ndev;
        struct net_device *idev;
-       struct rdma_dev_addr dev_addr = {
-               .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
-                                sa_path_get_ifindex(rec) : 0),
-               .net = sa_path_get_ndev(rec) ?
-                       sa_path_get_ndev(rec) :
-                       &init_net
-       };
+       struct rdma_dev_addr dev_addr = {};
        union {
                struct sockaddr     _sockaddr;
                struct sockaddr_in  _sockaddr_in;
@@ -1250,6 +1244,14 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
 
        if (rec->roce.route_resolved)
                return 0;
+       if (!attr || !attr->ndev)
+               return -EINVAL;
+
+       dev_addr.bound_dev_if = attr->ndev->ifindex;
+       /* TODO: Use net from the ib_gid_attr once it is added to it,
+        * until than, limit itself to init_net.
+        */
+       dev_addr.net = &init_net;
 
        if (!device->get_netdev)
                return -EOPNOTSUPP;
@@ -1278,16 +1280,13 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
                ret = -ENODEV;
                goto done;
        }
-       ndev = ib_get_ndev_from_path(rec);
        rcu_read_lock();
-       if ((ndev && ndev != resolved_dev) ||
+       if (attr->ndev != resolved_dev ||
            (resolved_dev != idev &&
             !rdma_is_upper_dev_rcu(idev, resolved_dev)))
                ret = -EHOSTUNREACH;
        rcu_read_unlock();
        dev_put(resolved_dev);
-       if (ndev)
-               dev_put(ndev);
 done:
        dev_put(idev);
        if (!ret)
@@ -1297,19 +1296,18 @@ done:
 
 static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
                                   struct sa_path_rec *rec,
-                                  struct rdma_ah_attr *ah_attr)
+                                  struct rdma_ah_attr *ah_attr,
+                                  const struct ib_gid_attr *gid_attr)
 {
        enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
-       struct net_device *ndev;
-       const struct ib_gid_attr *gid_attr;
 
-       ndev = ib_get_ndev_from_path(rec);
-       gid_attr =
-               rdma_find_gid_by_port(device, &rec->sgid, type, port_num, ndev);
-       if (ndev)
-               dev_put(ndev);
-       if (IS_ERR(gid_attr))
-               return PTR_ERR(gid_attr);
+       if (!gid_attr) {
+               gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type,
+                                                port_num, NULL);
+               if (IS_ERR(gid_attr))
+                       return PTR_ERR(gid_attr);
+       } else
+               rdma_hold_gid_attr(gid_attr);
 
        rdma_move_grh_sgid_attr(ah_attr, &rec->dgid,
                                be32_to_cpu(rec->flow_label),
@@ -1318,9 +1316,26 @@ static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
        return 0;
 }
 
+/**
+ * ib_init_ah_attr_from_path - Initialize address handle attributes based on
+ *   an SA path record.
+ * @device: Device associated ah attributes initialization.
+ * @port_num: Port on the specified device.
+ * @rec: path record entry to use for ah attributes initialization.
+ * @ah_attr: address handle attributes to initialization from path record.
+ * @sgid_attr: SGID attribute to consider during initialization.
+ *
+ * When ib_init_ah_attr_from_path() returns success,
+ * (a) for IB link layer it optionally contains a reference to SGID attribute
+ * when GRH is present for IB link layer.
+ * (b) for RoCE link layer it contains a reference to SGID attribute.
+ * User must invoke rdma_destroy_ah_attr() to release reference to SGID
+ * attributes which are initialized using ib_init_ah_attr_from_path().
+ */
 int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
                              struct sa_path_rec *rec,
-                             struct rdma_ah_attr *ah_attr)
+                             struct rdma_ah_attr *ah_attr,
+                             const struct ib_gid_attr *gid_attr)
 {
        int ret = 0;
 
@@ -1331,7 +1346,8 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
        rdma_ah_set_static_rate(ah_attr, rec->rate);
 
        if (sa_path_is_roce(rec)) {
-               ret = roce_resolve_route_from_path(device, port_num, rec);
+               ret = roce_resolve_route_from_path(device, port_num, rec,
+                                                  gid_attr);
                if (ret)
                        return ret;
 
@@ -1348,7 +1364,8 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
        }
 
        if (rec->hop_limit > 0 || sa_path_is_roce(rec))
-               ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr);
+               ret = init_ah_attr_grh_fields(device, port_num,
+                                             rec, ah_attr, gid_attr);
        return ret;
 }
 EXPORT_SYMBOL(ib_init_ah_attr_from_path);
@@ -1556,8 +1573,6 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
                                  ARRAY_SIZE(path_rec_table),
                                  mad->data, &rec);
                        rec.rec_type = SA_PATH_REC_TYPE_IB;
-                       sa_path_set_ndev(&rec, NULL);
-                       sa_path_set_ifindex(&rec, 0);
                        sa_path_set_dmac_zero(&rec);
 
                        if (query->conv_pr) {
index bb372b4713a41c20dbbf01f6176d731b4dfad3d3..b8d715c68ca44e3c41bf87f1547384b40c82d4a4 100644 (file)
@@ -211,7 +211,5 @@ void ib_copy_path_rec_from_user(struct sa_path_rec *dst,
 
        /* TODO: No need to set this */
        sa_path_set_dmac_zero(dst);
-       sa_path_set_ndev(dst, NULL);
-       sa_path_set_ifindex(dst, 0);
 }
 EXPORT_SYMBOL(ib_copy_path_rec_from_user);
index 45663f3117e54387dff6f297950a908e2fd84bae..983e52b871f3f35073416927e419e67cd81a547f 100644 (file)
@@ -770,7 +770,7 @@ static void path_rec_completion(int status,
                struct rdma_ah_attr av;
 
                if (!ib_init_ah_attr_from_path(priv->ca, priv->port,
-                                              pathrec, &av)) {
+                                              pathrec, &av, NULL)) {
                        ah = ipoib_create_ah(dev, priv->pd, &av);
                        rdma_destroy_ah_attr(&av);
                }
index bacb144f7780243a3413e90bc72a9dbcbb98c4c0..b6ddf2a1b9d8683fe64c33a98605519762360ce7 100644 (file)
@@ -172,12 +172,7 @@ struct sa_path_rec_ib {
  */
 struct sa_path_rec_roce {
        bool    route_resolved;
-       u8           dmac[ETH_ALEN];
-       /* ignored in IB */
-       int          ifindex;
-       /* ignored in IB */
-       struct net  *net;
-
+       u8      dmac[ETH_ALEN];
 };
 
 struct sa_path_rec_opa {
@@ -556,13 +551,10 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
                             enum ib_gid_type gid_type,
                             struct rdma_ah_attr *ah_attr);
 
-/**
- * ib_init_ah_attr_from_path - Initialize address handle attributes based on
- *   an SA path record.
- */
 int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
                              struct sa_path_rec *rec,
-                             struct rdma_ah_attr *ah_attr);
+                             struct rdma_ah_attr *ah_attr,
+                             const struct ib_gid_attr *sgid_attr);
 
 /**
  * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec
@@ -667,45 +659,10 @@ static inline void sa_path_set_dmac_zero(struct sa_path_rec *rec)
                eth_zero_addr(rec->roce.dmac);
 }
 
-static inline void sa_path_set_ifindex(struct sa_path_rec *rec, int ifindex)
-{
-       if (sa_path_is_roce(rec))
-               rec->roce.ifindex = ifindex;
-}
-
-static inline void sa_path_set_ndev(struct sa_path_rec *rec, struct net *net)
-{
-       if (sa_path_is_roce(rec))
-               rec->roce.net = net;
-}
-
 static inline u8 *sa_path_get_dmac(struct sa_path_rec *rec)
 {
        if (sa_path_is_roce(rec))
                return rec->roce.dmac;
        return NULL;
 }
-
-static inline int sa_path_get_ifindex(struct sa_path_rec *rec)
-{
-       if (sa_path_is_roce(rec))
-               return rec->roce.ifindex;
-       return 0;
-}
-
-static inline struct net *sa_path_get_ndev(struct sa_path_rec *rec)
-{
-       if (sa_path_is_roce(rec))
-               return rec->roce.net;
-       return NULL;
-}
-
-static inline struct net_device *ib_get_ndev_from_path(struct sa_path_rec *rec)
-{
-       return sa_path_get_ndev(rec) ?
-               dev_get_by_index(sa_path_get_ndev(rec),
-                                sa_path_get_ifindex(rec))
-               : NULL;
-}
-
 #endif /* IB_SA_H */