CIFS: SMBD: Upper layer performs SMB write via RDMA read through memory registration
[sfrench/cifs-2.6.git] / fs / cifs / smb2pdu.c
index 5cdcf1aafdf1f2b0d90006b199ea66ae8344f564..908d7770d15a316c48604eeadf48abfd52e160da 100644 (file)
@@ -48,6 +48,7 @@
 #include "smb2glob.h"
 #include "cifspdu.h"
 #include "cifs_spnego.h"
+#include "smbdirect.h"
 
 /*
  *  The following table defines the expected "StructureSize" of SMB2 requests
@@ -319,54 +320,16 @@ fill_small_buf(__le16 smb2_command, struct cifs_tcon *tcon, void *buf,
        *total_len = parmsize + sizeof(struct smb2_sync_hdr);
 }
 
-/* init request without RFC1001 length at the beginning */
-static int
-smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
-                   void **request_buf, unsigned int *total_len)
-{
-       int rc;
-       struct smb2_sync_hdr *shdr;
-
-       rc = smb2_reconnect(smb2_command, tcon);
-       if (rc)
-               return rc;
-
-       /* BB eventually switch this to SMB2 specific small buf size */
-       *request_buf = cifs_small_buf_get();
-       if (*request_buf == NULL) {
-               /* BB should we add a retry in here if not a writepage? */
-               return -ENOMEM;
-       }
-
-       shdr = (struct smb2_sync_hdr *)(*request_buf);
-
-       fill_small_buf(smb2_command, tcon, shdr, total_len);
-
-       if (tcon != NULL) {
-#ifdef CONFIG_CIFS_STATS2
-               uint16_t com_code = le16_to_cpu(smb2_command);
-
-               cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_sent[com_code]);
-#endif
-               cifs_stats_inc(&tcon->num_smbs_sent);
-       }
-
-       return rc;
-}
-
 /*
  * Allocate and return pointer to an SMB request hdr, and set basic
  * SMB information in the SMB header. If the return code is zero, this
- * function must have filled in request_buf pointer. The returned buffer
- * has RFC1001 length at the beginning.
+ * function must have filled in request_buf pointer.
  */
 static int
-small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon,
-               void **request_buf)
+smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
+                   void **request_buf, unsigned int *total_len)
 {
        int rc;
-       unsigned int total_len;
-       struct smb2_pdu *pdu;
 
        rc = smb2_reconnect(smb2_command, tcon);
        if (rc)
@@ -379,12 +342,9 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon,
                return -ENOMEM;
        }
 
-       pdu = (struct smb2_pdu *)(*request_buf);
-
-       fill_small_buf(smb2_command, tcon, get_sync_hdr(pdu), &total_len);
-
-       /* Note this is only network field converted to big endian */
-       pdu->hdr.smb2_buf_length = cpu_to_be32(total_len);
+       fill_small_buf(smb2_command, tcon,
+                      (struct smb2_sync_hdr *)(*request_buf),
+                      total_len);
 
        if (tcon != NULL) {
 #ifdef CONFIG_CIFS_STATS2
@@ -2470,18 +2430,21 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
  */
 static int
 smb2_new_read_req(void **buf, unsigned int *total_len,
-                 struct cifs_io_parms *io_parms, unsigned int remaining_bytes,
-                 int request_type)
+       struct cifs_io_parms *io_parms, struct cifs_readdata *rdata,
+       unsigned int remaining_bytes, int request_type)
 {
        int rc = -EACCES;
        struct smb2_read_plain_req *req = NULL;
        struct smb2_sync_hdr *shdr;
+       struct TCP_Server_Info *server;
 
        rc = smb2_plain_req_init(SMB2_READ, io_parms->tcon, (void **) &req,
                                 total_len);
        if (rc)
                return rc;
-       if (io_parms->tcon->ses->server == NULL)
+
+       server = io_parms->tcon->ses->server;
+       if (server == NULL)
                return -ECONNABORTED;
 
        shdr = &req->sync_hdr;
@@ -2609,7 +2572,8 @@ smb2_async_readv(struct cifs_readdata *rdata)
 
        server = io_parms.tcon->ses->server;
 
-       rc = smb2_new_read_req((void **) &buf, &total_len, &io_parms, 0, 0);
+       rc = smb2_new_read_req(
+               (void **) &buf, &total_len, &io_parms, rdata, 0, 0);
        if (rc) {
                if (rc == -EAGAIN && rdata->credits) {
                        /* credits was reset by reconnect */
@@ -2674,7 +2638,7 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
        struct cifs_ses *ses = io_parms->tcon->ses;
 
        *nbytes = 0;
-       rc = smb2_new_read_req((void **)&req, &total_len, io_parms, 0, 0);
+       rc = smb2_new_read_req((void **)&req, &total_len, io_parms, NULL, 0, 0);
        if (rc)
                return rc;
 
@@ -2765,7 +2729,19 @@ smb2_writev_callback(struct mid_q_entry *mid)
                wdata->result = -EIO;
                break;
        }
-
+#ifdef CONFIG_CIFS_SMB_DIRECT
+       /*
+        * If this wdata has a memory registered, the MR can be freed
+        * The number of MRs available is limited, it's important to recover
+        * used MR as soon as I/O is finished. Hold MR longer in the later
+        * I/O process can possibly result in I/O deadlock due to lack of MR
+        * to send request on I/O retry
+        */
+       if (wdata->mr) {
+               smbd_deregister_mr(wdata->mr);
+               wdata->mr = NULL;
+       }
+#endif
        if (wdata->result)
                cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
 
@@ -2817,7 +2793,42 @@ smb2_async_writev(struct cifs_writedata *wdata,
        req->DataOffset = cpu_to_le16(
                                offsetof(struct smb2_write_req, Buffer));
        req->RemainingBytes = 0;
-
+#ifdef CONFIG_CIFS_SMB_DIRECT
+       /*
+        * If we want to do a server RDMA read, fill in and append
+        * smbd_buffer_descriptor_v1 to the end of write request
+        */
+       if (server->rdma && wdata->bytes >=
+               server->smbd_conn->rdma_readwrite_threshold) {
+
+               struct smbd_buffer_descriptor_v1 *v1;
+               bool need_invalidate = server->dialect == SMB30_PROT_ID;
+
+               wdata->mr = smbd_register_mr(
+                               server->smbd_conn, wdata->pages,
+                               wdata->nr_pages, wdata->tailsz,
+                               false, need_invalidate);
+               if (!wdata->mr) {
+                       rc = -ENOBUFS;
+                       goto async_writev_out;
+               }
+               req->Length = 0;
+               req->DataOffset = 0;
+               req->RemainingBytes =
+                       (wdata->nr_pages-1)*PAGE_SIZE + wdata->tailsz;
+               req->Channel = SMB2_CHANNEL_RDMA_V1_INVALIDATE;
+               if (need_invalidate)
+                       req->Channel = SMB2_CHANNEL_RDMA_V1;
+               req->WriteChannelInfoOffset =
+                       offsetof(struct smb2_write_req, Buffer);
+               req->WriteChannelInfoLength =
+                       sizeof(struct smbd_buffer_descriptor_v1);
+               v1 = (struct smbd_buffer_descriptor_v1 *) &req->Buffer[0];
+               v1->offset = wdata->mr->mr->iova;
+               v1->token = wdata->mr->mr->rkey;
+               v1->length = wdata->mr->mr->length;
+       }
+#endif
        /* 4 for rfc1002 length field and 1 for Buffer */
        iov[0].iov_len = 4;
        rfc1002_marker = cpu_to_be32(total_len - 1 + wdata->bytes);
@@ -2831,11 +2842,22 @@ smb2_async_writev(struct cifs_writedata *wdata,
        rqst.rq_npages = wdata->nr_pages;
        rqst.rq_pagesz = wdata->pagesz;
        rqst.rq_tailsz = wdata->tailsz;
-
+#ifdef CONFIG_CIFS_SMB_DIRECT
+       if (wdata->mr) {
+               iov[1].iov_len += sizeof(struct smbd_buffer_descriptor_v1);
+               rqst.rq_npages = 0;
+       }
+#endif
        cifs_dbg(FYI, "async write at %llu %u bytes\n",
                 wdata->offset, wdata->bytes);
 
+#ifdef CONFIG_CIFS_SMB_DIRECT
+       /* For RDMA read, I/O size is in RemainingBytes not in Length */
+       if (!wdata->mr)
+               req->Length = cpu_to_le32(wdata->bytes);
+#else
        req->Length = cpu_to_le32(wdata->bytes);
+#endif
 
        if (wdata->credits) {
                shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,