Merge tag 'nfs-for-4.2-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
[sfrench/cifs-2.6.git] / net / sunrpc / xprtrdma / verbs.c
index 52df265b472a9b2b79574c7d9363acba26ea5d8b..891c4ede2c20ea8d8c6bc79ee080f353d4df13d7 100644 (file)
@@ -80,7 +80,6 @@ static void
 rpcrdma_run_tasklet(unsigned long data)
 {
        struct rpcrdma_rep *rep;
-       void (*func)(struct rpcrdma_rep *);
        unsigned long flags;
 
        data = data;
@@ -89,14 +88,9 @@ rpcrdma_run_tasklet(unsigned long data)
                rep = list_entry(rpcrdma_tasklets_g.next,
                                 struct rpcrdma_rep, rr_list);
                list_del(&rep->rr_list);
-               func = rep->rr_func;
-               rep->rr_func = NULL;
                spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
 
-               if (func)
-                       func(rep);
-               else
-                       rpcrdma_recv_buffer_put(rep);
+               rpcrdma_reply_handler(rep);
 
                spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
        }
@@ -236,7 +230,7 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
                __func__, rep, wc->byte_len);
 
        rep->rr_len = wc->byte_len;
-       ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
+       ib_dma_sync_single_for_cpu(rep->rr_device,
                                   rdmab_addr(rep->rr_rdmabuf),
                                   rep->rr_len, DMA_FROM_DEVICE);
        prefetch(rdmab_to_msg(rep->rr_rdmabuf));
@@ -407,7 +401,7 @@ connected:
 
                pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
                        sap, rpc_get_port(sap),
-                       ia->ri_id->device->name,
+                       ia->ri_device->name,
                        ia->ri_ops->ro_displayname,
                        xprt->rx_buf.rb_max_requests,
                        ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
@@ -508,8 +502,9 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
                rc = PTR_ERR(ia->ri_id);
                goto out1;
        }
+       ia->ri_device = ia->ri_id->device;
 
-       ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
+       ia->ri_pd = ib_alloc_pd(ia->ri_device);
        if (IS_ERR(ia->ri_pd)) {
                rc = PTR_ERR(ia->ri_pd);
                dprintk("RPC:       %s: ib_alloc_pd() failed %i\n",
@@ -517,7 +512,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
                goto out2;
        }
 
-       rc = ib_query_device(ia->ri_id->device, devattr);
+       rc = ib_query_device(ia->ri_device, devattr);
        if (rc) {
                dprintk("RPC:       %s: ib_query_device failed %d\n",
                        __func__, rc);
@@ -526,7 +521,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 
        if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
                ia->ri_have_dma_lkey = 1;
-               ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
+               ia->ri_dma_lkey = ia->ri_device->local_dma_lkey;
        }
 
        if (memreg == RPCRDMA_FRMR) {
@@ -541,7 +536,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
                }
        }
        if (memreg == RPCRDMA_MTHCAFMR) {
-               if (!ia->ri_id->device->alloc_fmr) {
+               if (!ia->ri_device->alloc_fmr) {
                        dprintk("RPC:       %s: MTHCAFMR registration "
                                "not supported by HCA\n", __func__);
                        memreg = RPCRDMA_ALLPHYSICAL;
@@ -590,9 +585,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
        dprintk("RPC:       %s: memory registration strategy is '%s'\n",
                __func__, ia->ri_ops->ro_displayname);
 
-       /* Else will do memory reg/dereg for each chunk */
-       ia->ri_memreg_strategy = memreg;
-
        rwlock_init(&ia->ri_qplock);
        return 0;
 
@@ -622,17 +614,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
                dprintk("RPC:       %s: ib_dereg_mr returned %i\n",
                        __func__, rc);
        }
+
        if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
                if (ia->ri_id->qp)
                        rdma_destroy_qp(ia->ri_id);
                rdma_destroy_id(ia->ri_id);
                ia->ri_id = NULL;
        }
-       if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
-               rc = ib_dealloc_pd(ia->ri_pd);
-               dprintk("RPC:       %s: ib_dealloc_pd returned %i\n",
-                       __func__, rc);
-       }
+
+       /* If the pd is still busy, xprtrdma missed freeing a resource */
+       if (ia->ri_pd && !IS_ERR(ia->ri_pd))
+               WARN_ON(ib_dealloc_pd(ia->ri_pd));
 }
 
 /*
@@ -693,8 +685,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
        INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
 
        cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1;
-       sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
-                                 rpcrdma_cq_async_error_upcall, ep, &cq_attr);
+       sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall,
+                             rpcrdma_cq_async_error_upcall, ep, &cq_attr);
        if (IS_ERR(sendcq)) {
                rc = PTR_ERR(sendcq);
                dprintk("RPC:       %s: failed to create send CQ: %i\n",
@@ -710,8 +702,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
        }
 
        cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1;
-       recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
-                                 rpcrdma_cq_async_error_upcall, ep, &cq_attr);
+       recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall,
+                             rpcrdma_cq_async_error_upcall, ep, &cq_attr);
        if (IS_ERR(recvcq)) {
                rc = PTR_ERR(recvcq);
                dprintk("RPC:       %s: failed to create recv CQ: %i\n",
@@ -817,8 +809,6 @@ retry:
                rpcrdma_flush_cqs(ep);
 
                xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
-               ia->ri_ops->ro_reset(xprt);
-
                id = rpcrdma_create_id(xprt, ia,
                                (struct sockaddr *)&xprt->rx_data.addr);
                if (IS_ERR(id)) {
@@ -832,7 +822,7 @@ retry:
                 * More stuff I haven't thought of!
                 * Rrrgh!
                 */
-               if (ia->ri_id->device != id->device) {
+               if (ia->ri_device != id->device) {
                        printk("RPC:       %s: can't reconnect on "
                                "different device!\n", __func__);
                        rdma_destroy_id(id);
@@ -974,7 +964,8 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
                goto out_free;
        }
 
-       rep->rr_buffer = &r_xprt->rx_buf;
+       rep->rr_device = ia->ri_device;
+       rep->rr_rxprt = r_xprt;
        return rep;
 
 out_free:
@@ -1098,31 +1089,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
        kfree(buf->rb_pool);
 }
 
-/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
- * some req segments uninitialized.
- */
-static void
-rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
+struct rpcrdma_mw *
+rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
 {
-       if (*mw) {
-               list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
-               *mw = NULL;
+       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+       struct rpcrdma_mw *mw = NULL;
+
+       spin_lock(&buf->rb_mwlock);
+       if (!list_empty(&buf->rb_mws)) {
+               mw = list_first_entry(&buf->rb_mws,
+                                     struct rpcrdma_mw, mw_list);
+               list_del_init(&mw->mw_list);
        }
+       spin_unlock(&buf->rb_mwlock);
+
+       if (!mw)
+               pr_err("RPC:       %s: no MWs available\n", __func__);
+       return mw;
 }
 
-/* Cycle mw's back in reverse order, and "spin" them.
- * This delays and scrambles reuse as much as possible.
- */
-static void
-rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
+void
+rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
 {
-       struct rpcrdma_mr_seg *seg = req->rl_segments;
-       struct rpcrdma_mr_seg *seg1 = seg;
-       int i;
+       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
 
-       for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
-               rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
-       rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
+       spin_lock(&buf->rb_mwlock);
+       list_add_tail(&mw->mw_list, &buf->rb_mws);
+       spin_unlock(&buf->rb_mwlock);
 }
 
 static void
@@ -1132,115 +1125,10 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
        req->rl_niovs = 0;
        if (req->rl_reply) {
                buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
-               req->rl_reply->rr_func = NULL;
                req->rl_reply = NULL;
        }
 }
 
-/* rpcrdma_unmap_one() was already done during deregistration.
- * Redo only the ib_post_send().
- */
-static void
-rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
-{
-       struct rpcrdma_xprt *r_xprt =
-                               container_of(ia, struct rpcrdma_xprt, rx_ia);
-       struct ib_send_wr invalidate_wr, *bad_wr;
-       int rc;
-
-       dprintk("RPC:       %s: FRMR %p is stale\n", __func__, r);
-
-       /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
-       r->r.frmr.fr_state = FRMR_IS_INVALID;
-
-       memset(&invalidate_wr, 0, sizeof(invalidate_wr));
-       invalidate_wr.wr_id = (unsigned long)(void *)r;
-       invalidate_wr.opcode = IB_WR_LOCAL_INV;
-       invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
-       DECR_CQCOUNT(&r_xprt->rx_ep);
-
-       dprintk("RPC:       %s: frmr %p invalidating rkey %08x\n",
-               __func__, r, r->r.frmr.fr_mr->rkey);
-
-       read_lock(&ia->ri_qplock);
-       rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
-       read_unlock(&ia->ri_qplock);
-       if (rc) {
-               /* Force rpcrdma_buffer_get() to retry */
-               r->r.frmr.fr_state = FRMR_IS_STALE;
-               dprintk("RPC:       %s: ib_post_send failed, %i\n",
-                       __func__, rc);
-       }
-}
-
-static void
-rpcrdma_retry_flushed_linv(struct list_head *stale,
-                          struct rpcrdma_buffer *buf)
-{
-       struct rpcrdma_ia *ia = rdmab_to_ia(buf);
-       struct list_head *pos;
-       struct rpcrdma_mw *r;
-       unsigned long flags;
-
-       list_for_each(pos, stale) {
-               r = list_entry(pos, struct rpcrdma_mw, mw_list);
-               rpcrdma_retry_local_inv(r, ia);
-       }
-
-       spin_lock_irqsave(&buf->rb_lock, flags);
-       list_splice_tail(stale, &buf->rb_mws);
-       spin_unlock_irqrestore(&buf->rb_lock, flags);
-}
-
-static struct rpcrdma_req *
-rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
-                        struct list_head *stale)
-{
-       struct rpcrdma_mw *r;
-       int i;
-
-       i = RPCRDMA_MAX_SEGS - 1;
-       while (!list_empty(&buf->rb_mws)) {
-               r = list_entry(buf->rb_mws.next,
-                              struct rpcrdma_mw, mw_list);
-               list_del(&r->mw_list);
-               if (r->r.frmr.fr_state == FRMR_IS_STALE) {
-                       list_add(&r->mw_list, stale);
-                       continue;
-               }
-               req->rl_segments[i].rl_mw = r;
-               if (unlikely(i-- == 0))
-                       return req;     /* Success */
-       }
-
-       /* Not enough entries on rb_mws for this req */
-       rpcrdma_buffer_put_sendbuf(req, buf);
-       rpcrdma_buffer_put_mrs(req, buf);
-       return NULL;
-}
-
-static struct rpcrdma_req *
-rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
-{
-       struct rpcrdma_mw *r;
-       int i;
-
-       i = RPCRDMA_MAX_SEGS - 1;
-       while (!list_empty(&buf->rb_mws)) {
-               r = list_entry(buf->rb_mws.next,
-                              struct rpcrdma_mw, mw_list);
-               list_del(&r->mw_list);
-               req->rl_segments[i].rl_mw = r;
-               if (unlikely(i-- == 0))
-                       return req;     /* Success */
-       }
-
-       /* Not enough entries on rb_mws for this req */
-       rpcrdma_buffer_put_sendbuf(req, buf);
-       rpcrdma_buffer_put_mrs(req, buf);
-       return NULL;
-}
-
 /*
  * Get a set of request/reply buffers.
  *
@@ -1253,12 +1141,11 @@ rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
 struct rpcrdma_req *
 rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
 {
-       struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
-       struct list_head stale;
        struct rpcrdma_req *req;
        unsigned long flags;
 
        spin_lock_irqsave(&buffers->rb_lock, flags);
+
        if (buffers->rb_send_index == buffers->rb_max_requests) {
                spin_unlock_irqrestore(&buffers->rb_lock, flags);
                dprintk("RPC:       %s: out of request buffers\n", __func__);
@@ -1277,20 +1164,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
        }
        buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
 
-       INIT_LIST_HEAD(&stale);
-       switch (ia->ri_memreg_strategy) {
-       case RPCRDMA_FRMR:
-               req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
-               break;
-       case RPCRDMA_MTHCAFMR:
-               req = rpcrdma_buffer_get_fmrs(req, buffers);
-               break;
-       default:
-               break;
-       }
        spin_unlock_irqrestore(&buffers->rb_lock, flags);
-       if (!list_empty(&stale))
-               rpcrdma_retry_flushed_linv(&stale, buffers);
        return req;
 }
 
@@ -1302,19 +1176,10 @@ void
 rpcrdma_buffer_put(struct rpcrdma_req *req)
 {
        struct rpcrdma_buffer *buffers = req->rl_buffer;
-       struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
        unsigned long flags;
 
        spin_lock_irqsave(&buffers->rb_lock, flags);
        rpcrdma_buffer_put_sendbuf(req, buffers);
-       switch (ia->ri_memreg_strategy) {
-       case RPCRDMA_FRMR:
-       case RPCRDMA_MTHCAFMR:
-               rpcrdma_buffer_put_mrs(req, buffers);
-               break;
-       default:
-               break;
-       }
        spin_unlock_irqrestore(&buffers->rb_lock, flags);
 }
 
@@ -1344,10 +1209,9 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
 void
 rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
 {
-       struct rpcrdma_buffer *buffers = rep->rr_buffer;
+       struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
        unsigned long flags;
 
-       rep->rr_func = NULL;
        spin_lock_irqsave(&buffers->rb_lock, flags);
        buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
        spin_unlock_irqrestore(&buffers->rb_lock, flags);
@@ -1376,9 +1240,9 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
        /*
         * All memory passed here was kmalloc'ed, therefore phys-contiguous.
         */
-       iov->addr = ib_dma_map_single(ia->ri_id->device,
+       iov->addr = ib_dma_map_single(ia->ri_device,
                        va, len, DMA_BIDIRECTIONAL);
-       if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
+       if (ib_dma_mapping_error(ia->ri_device, iov->addr))
                return -ENOMEM;
 
        iov->length = len;
@@ -1422,8 +1286,8 @@ rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
 {
        int rc;
 
-       ib_dma_unmap_single(ia->ri_id->device,
-                       iov->addr, iov->length, DMA_BIDIRECTIONAL);
+       ib_dma_unmap_single(ia->ri_device,
+                           iov->addr, iov->length, DMA_BIDIRECTIONAL);
 
        if (NULL == mr)
                return 0;
@@ -1516,15 +1380,18 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
        send_wr.num_sge = req->rl_niovs;
        send_wr.opcode = IB_WR_SEND;
        if (send_wr.num_sge == 4)       /* no need to sync any pad (constant) */
-               ib_dma_sync_single_for_device(ia->ri_id->device,
-                       req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
-                       DMA_TO_DEVICE);
-       ib_dma_sync_single_for_device(ia->ri_id->device,
-               req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
-               DMA_TO_DEVICE);
-       ib_dma_sync_single_for_device(ia->ri_id->device,
-               req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
-               DMA_TO_DEVICE);
+               ib_dma_sync_single_for_device(ia->ri_device,
+                                             req->rl_send_iov[3].addr,
+                                             req->rl_send_iov[3].length,
+                                             DMA_TO_DEVICE);
+       ib_dma_sync_single_for_device(ia->ri_device,
+                                     req->rl_send_iov[1].addr,
+                                     req->rl_send_iov[1].length,
+                                     DMA_TO_DEVICE);
+       ib_dma_sync_single_for_device(ia->ri_device,
+                                     req->rl_send_iov[0].addr,
+                                     req->rl_send_iov[0].length,
+                                     DMA_TO_DEVICE);
 
        if (DECR_CQCOUNT(ep) > 0)
                send_wr.send_flags = 0;
@@ -1557,7 +1424,7 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
        recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
        recv_wr.num_sge = 1;
 
-       ib_dma_sync_single_for_cpu(ia->ri_id->device,
+       ib_dma_sync_single_for_cpu(ia->ri_device,
                                   rdmab_addr(rep->rr_rdmabuf),
                                   rdmab_length(rep->rr_rdmabuf),
                                   DMA_BIDIRECTIONAL);