RDMA/cma: Ensure rdma_addr_cancel() happens before issuing more requests

author Jason Gunthorpe <jgg@nvidia.com>

Thu, 16 Sep 2021 18:34:46 +0000 (15:34 -0300)

committer Jason Gunthorpe <jgg@nvidia.com>

Thu, 23 Sep 2021 20:03:09 +0000 (17:03 -0300)
author Jason Gunthorpe <jgg@nvidia.com>
Thu, 16 Sep 2021 18:34:46 +0000 (15:34 -0300)
committer Jason Gunthorpe <jgg@nvidia.com>
Thu, 23 Sep 2021 20:03:09 +0000 (17:03 -0300)
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c

index 8862b0e572f0fe6ecf329e6e03c66c46a868eba1..704ce595542c5735e2390ebedc2cf2697333b8e5 100644 (file)
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1783,6 +1783,14 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
  {
         switch (state) {
         case RDMA_CM_ADDR_QUERY:
+               /*
+                * We can avoid doing the rdma_addr_cancel() based on state,
+                * only RDMA_CM_ADDR_QUERY has a work that could still execute.
+                * Notice that the addr_handler work could still be exiting
+                * outside this state, however due to the interaction with the
+                * handler_mutex the work is guaranteed not to touch id_priv
+                * during exit.
+                */
                 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
                 break;
         case RDMA_CM_ROUTE_QUERY:
@@ -3425,6 +3433,21 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
                 if (dst_addr->sa_family == AF_IB) {
                         ret = cma_resolve_ib_addr(id_priv);
                 } else {
+                       /*
+                        * The FSM can return back to RDMA_CM_ADDR_BOUND after
+                        * rdma_resolve_ip() is called, eg through the error
+                        * path in addr_handler(). If this happens the existing
+                        * request must be canceled before issuing a new one.
+                        * Since canceling a request is a bit slow and this
+                        * oddball path is rare, keep track once a request has
+                        * been issued. The track turns out to be a permanent
+                        * state since this is the only cancel as it is
+                        * immediately before rdma_resolve_ip().
+                        */
+                       if (id_priv->used_resolve_ip)
+                               rdma_addr_cancel(&id->route.addr.dev_addr);
+                       else
+                               id_priv->used_resolve_ip = 1;
                         ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
                                               &id->route.addr.dev_addr,
                                               timeout_ms, addr_handler,
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h

index 5c463da9984536c6795ee7905fb08848235b4cb9..f92f101ea9818f8c1ecf11375353103dce234858 100644 (file)
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -91,6 +91,7 @@ struct rdma_id_private {
         u8                      afonly;
         u8                      timeout;
         u8                      min_rnr_timer;
+       u8 used_resolve_ip;
         enum ib_gid_type        gid_type;
  
         /*
author	Jason Gunthorpe <jgg@nvidia.com>
	Thu, 16 Sep 2021 18:34:46 +0000 (15:34 -0300)
committer	Jason Gunthorpe <jgg@nvidia.com>
	Thu, 23 Sep 2021 20:03:09 +0000 (17:03 -0300)
drivers/infiniband/core/cma.c		patch \| blob \| history
drivers/infiniband/core/cma_priv.h		patch \| blob \| history