Merge tag 'nfs-for-5.1-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 12 Mar 2019 21:50:42 +0000 (14:50 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 12 Mar 2019 21:50:42 +0000 (14:50 -0700)
Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable fixes:
   - Fixes for NFS I/O request leakages
   - Fix error handling paths in the NFS I/O recoalescing code
   - Reinitialise NFSv4.1 sequence results before retransmitting a
     request
   - Fix a soft lockup in the delegation recovery code
   - Bulk destroy of layouts needs to be safe w.r.t. umount
   - Prevent thundering herd issues when the SUNRPC socket is not
     connected
   - Respect RPC call timeouts when retrying transmission

  Features:
   - Convert rpc auth layer to use xdr_streams
   - Config option to disable insecure RPCSEC_GSS crypto types
   - Reduce size of RPC receive buffers
   - Readdirplus optimization by cache mechanism
   - Convert SUNRPC socket send code to use iov_iter()
   - SUNRPC micro-optimisations to avoid indirect calls
   - Add support for the pNFS LAYOUTERROR operation and use it with the
     pNFS/flexfiles driver
   - Add trace events to report non-zero NFS status codes
   - Various removals of unnecessary dprintks

  Bugfixes and cleanups:
   - Fix a number of sparse warnings and documentation format warnings
   - Fix nfs_parse_devname to not modify it's argument
   - Fix potential corruption of page being written through pNFS/blocks
   - fix xfstest generic/099 failures on nfsv3
   - Avoid NFSv4.1 "false retries" when RPC calls are interrupted
   - Abort I/O early if the pNFS/flexfiles layout segment was
     invalidated
   - Avoid unnecessary pNFS/flexfiles layout invalidations"

* tag 'nfs-for-5.1-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (90 commits)
  SUNRPC: Take the transport send lock before binding+connecting
  SUNRPC: Micro-optimise when the task is known not to be sleeping
  SUNRPC: Check whether the task was transmitted before rebind/reconnect
  SUNRPC: Remove redundant calls to RPC_IS_QUEUED()
  SUNRPC: Clean up
  SUNRPC: Respect RPC call timeouts when retrying transmission
  SUNRPC: Fix up RPC back channel transmission
  SUNRPC: Prevent thundering herd when the socket is not connected
  SUNRPC: Allow dynamic allocation of back channel slots
  NFSv4.1: Bump the default callback session slot count to 16
  SUNRPC: Convert remaining GFP_NOIO, and GFP_NOWAIT sites in sunrpc
  NFS/flexfiles: Clean up mirror DS initialisation
  NFS/flexfiles: Remove dead code in ff_layout_mirror_valid()
  NFS/flexfile: Simplify nfs4_ff_layout_select_ds_stateid()
  NFS/flexfile: Simplify nfs4_ff_layout_ds_version()
  NFS/flexfiles: Simplify ff_layout_get_ds_cred()
  NFS/flexfiles: Simplify nfs4_ff_find_or_create_ds_client()
  NFS/flexfiles: Simplify nfs4_ff_layout_select_ds_fh()
  NFS/flexfiles: Speed up read failover when DSes are down
  NFS/flexfiles: Don't invalidate DS deviceids for being unresponsive
  ...

83 files changed:
fs/lockd/clnt4xdr.c
fs/lockd/clntxdr.c
fs/nfs/callback_xdr.c
fs/nfs/delegation.c
fs/nfs/delegation.h
fs/nfs/dir.c
fs/nfs/direct.c
fs/nfs/file.c
fs/nfs/flexfilelayout/flexfilelayout.c
fs/nfs/flexfilelayout/flexfilelayout.h
fs/nfs/flexfilelayout/flexfilelayoutdev.c
fs/nfs/inode.c
fs/nfs/internal.h
fs/nfs/io.c
fs/nfs/namespace.c
fs/nfs/nfs2xdr.c
fs/nfs/nfs3acl.c
fs/nfs/nfs3xdr.c
fs/nfs/nfs42.h
fs/nfs/nfs42proc.c
fs/nfs/nfs42xdr.c
fs/nfs/nfs4client.c
fs/nfs/nfs4namespace.c
fs/nfs/nfs4proc.c
fs/nfs/nfs4session.c
fs/nfs/nfs4session.h
fs/nfs/nfs4state.c
fs/nfs/nfs4trace.h
fs/nfs/nfs4xdr.c
fs/nfs/nfstrace.c
fs/nfs/nfstrace.h
fs/nfs/pagelist.c
fs/nfs/pnfs.c
fs/nfs/pnfs.h
fs/nfs/pnfs_dev.c
fs/nfs/read.c
fs/nfs/super.c
fs/nfs/unlink.c
fs/nfs/write.c
fs/nfsd/nfs4callback.c
include/linux/nfs4.h
include/linux/nfs_fs_sb.h
include/linux/nfs_page.h
include/linux/nfs_xdr.h
include/linux/sunrpc/auth.h
include/linux/sunrpc/clnt.h
include/linux/sunrpc/gss_krb5_enctypes.h
include/linux/sunrpc/sched.h
include/linux/sunrpc/xdr.h
include/linux/sunrpc/xprt.h
include/linux/sunrpc/xprtsock.h
include/trace/events/rpcgss.h [new file with mode: 0644]
include/trace/events/rpcrdma.h
include/trace/events/sunrpc.h
net/sunrpc/Kconfig
net/sunrpc/auth.c
net/sunrpc/auth_gss/Makefile
net/sunrpc/auth_gss/auth_gss.c
net/sunrpc/auth_gss/gss_krb5_mech.c
net/sunrpc/auth_gss/gss_krb5_wrap.c
net/sunrpc/auth_gss/gss_mech_switch.c
net/sunrpc/auth_gss/gss_rpc_upcall.c
net/sunrpc/auth_gss/gss_rpc_upcall.h
net/sunrpc/auth_gss/gss_rpc_xdr.c
net/sunrpc/auth_gss/gss_rpc_xdr.h
net/sunrpc/auth_gss/svcauth_gss.c
net/sunrpc/auth_gss/trace.c [new file with mode: 0644]
net/sunrpc/auth_null.c
net/sunrpc/auth_unix.c
net/sunrpc/backchannel_rqst.c
net/sunrpc/clnt.c
net/sunrpc/sched.c
net/sunrpc/svc.c
net/sunrpc/xdr.c
net/sunrpc/xprt.c
net/sunrpc/xprtrdma/backchannel.c
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/svc_rdma_backchannel.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h
net/sunrpc/xprtsock.c

index 214a2fa1f1e392a991e1a904316fca3ba3927075..7df6324ccb8ab33ac9e04de7cbcfb11bb4eb1413 100644 (file)
@@ -74,17 +74,6 @@ static void nlm4_compute_offsets(const struct nlm_lock *lock,
                *l_len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1);
 }
 
-/*
- * Handle decode buffer overflows out-of-line.
- */
-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
-{
-       dprintk("lockd: %s prematurely hit the end of our receive buffer. "
-               "Remaining buffer length is %tu words.\n",
-               func, xdr->end - xdr->p);
-}
-
-
 /*
  * Encode/decode NLMv4 basic data types
  *
@@ -176,7 +165,6 @@ out_size:
        dprintk("NFS: returned cookie was too long: %u\n", length);
        return -EIO;
 out_overflow:
-       print_overflow_msg(__func__, xdr);
        return -EIO;
 }
 
@@ -236,7 +224,6 @@ out_bad_xdr:
                        __func__, be32_to_cpup(p));
        return -EIO;
 out_overflow:
-       print_overflow_msg(__func__, xdr);
        return -EIO;
 }
 
@@ -309,7 +296,6 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result)
 out:
        return error;
 out_overflow:
-       print_overflow_msg(__func__, xdr);
        return -EIO;
 }
 
index 747b9c8c940ac4882fb38606b6294df140bd517c..4df62f6355295556a4efa65a7148ccc14334a975 100644 (file)
@@ -70,17 +70,6 @@ static void nlm_compute_offsets(const struct nlm_lock *lock,
                *l_len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
 }
 
-/*
- * Handle decode buffer overflows out-of-line.
- */
-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
-{
-       dprintk("lockd: %s prematurely hit the end of our receive buffer. "
-               "Remaining buffer length is %tu words.\n",
-               func, xdr->end - xdr->p);
-}
-
-
 /*
  * Encode/decode NLMv3 basic data types
  *
@@ -173,7 +162,6 @@ out_size:
        dprintk("NFS: returned cookie was too long: %u\n", length);
        return -EIO;
 out_overflow:
-       print_overflow_msg(__func__, xdr);
        return -EIO;
 }
 
@@ -231,7 +219,6 @@ out_enum:
                __func__, be32_to_cpup(p));
        return -EIO;
 out_overflow:
-       print_overflow_msg(__func__, xdr);
        return -EIO;
 }
 
@@ -303,7 +290,6 @@ static int decode_nlm_holder(struct xdr_stream *xdr, struct nlm_res *result)
 out:
        return error;
 out_overflow:
-       print_overflow_msg(__func__, xdr);
        return -EIO;
 }
 
index a87a562734077c221884edaf7962086af6e7a643..06233bfa6d73884ed7e52a30c64054c6b97b7457 100644 (file)
@@ -72,16 +72,6 @@ static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p)
        return xdr_ressize_check(rqstp, p);
 }
 
-static __be32 *read_buf(struct xdr_stream *xdr, size_t nbytes)
-{
-       __be32 *p;
-
-       p = xdr_inline_decode(xdr, nbytes);
-       if (unlikely(p == NULL))
-               printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed!\n");
-       return p;
-}
-
 static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len,
                const char **str, size_t maxlen)
 {
@@ -98,13 +88,13 @@ static __be32 decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
 {
        __be32 *p;
 
-       p = read_buf(xdr, 4);
+       p = xdr_inline_decode(xdr, 4);
        if (unlikely(p == NULL))
                return htonl(NFS4ERR_RESOURCE);
        fh->size = ntohl(*p);
        if (fh->size > NFS4_FHSIZE)
                return htonl(NFS4ERR_BADHANDLE);
-       p = read_buf(xdr, fh->size);
+       p = xdr_inline_decode(xdr, fh->size);
        if (unlikely(p == NULL))
                return htonl(NFS4ERR_RESOURCE);
        memcpy(&fh->data[0], p, fh->size);
@@ -117,11 +107,11 @@ static __be32 decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
        __be32 *p;
        unsigned int attrlen;
 
-       p = read_buf(xdr, 4);
+       p = xdr_inline_decode(xdr, 4);
        if (unlikely(p == NULL))
                return htonl(NFS4ERR_RESOURCE);
        attrlen = ntohl(*p);
-       p = read_buf(xdr, attrlen << 2);
+       p = xdr_inline_decode(xdr, attrlen << 2);
        if (unlikely(p == NULL))
                return htonl(NFS4ERR_RESOURCE);
        if (likely(attrlen > 0))
@@ -135,7 +125,7 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
 {
        __be32 *p;
 
-       p = read_buf(xdr, NFS4_STATEID_SIZE);
+       p = xdr_inline_decode(xdr, NFS4_STATEID_SIZE);
        if (unlikely(p == NULL))
                return htonl(NFS4ERR_RESOURCE);
        memcpy(stateid->data, p, NFS4_STATEID_SIZE);
@@ -156,7 +146,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
        status = decode_string(xdr, &hdr->taglen, &hdr->tag, CB_OP_TAGLEN_MAXSZ);
        if (unlikely(status != 0))
                return status;
-       p = read_buf(xdr, 12);
+       p = xdr_inline_decode(xdr, 12);
        if (unlikely(p == NULL))
                return htonl(NFS4ERR_RESOURCE);
        hdr->minorversion = ntohl(*p++);
@@ -176,7 +166,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
 static __be32 decode_op_hdr(struct xdr_stream *xdr, unsigned int *op)
 {
        __be32 *p;
-       p = read_buf(xdr, 4);
+       p = xdr_inline_decode(xdr, 4);
        if (unlikely(p == NULL))
                return htonl(NFS4ERR_RESOURCE_HDR);
        *op = ntohl(*p);
@@ -205,7 +195,7 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp,
        status = decode_delegation_stateid(xdr, &args->stateid);
        if (unlikely(status != 0))
                return status;
-       p = read_buf(xdr, 4);
+       p = xdr_inline_decode(xdr, 4);
        if (unlikely(p == NULL))
                return htonl(NFS4ERR_RESOURCE);
        args->truncate = ntohl(*p);
@@ -227,7 +217,7 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
        __be32 status = 0;
        uint32_t iomode;
 
-       p = read_buf(xdr, 4 * sizeof(uint32_t));
+       p = xdr_inline_decode(xdr, 4 * sizeof(uint32_t));
        if (unlikely(p == NULL))
                return htonl(NFS4ERR_BADXDR);
 
@@ -245,14 +235,14 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
                if (unlikely(status != 0))
                        return status;
 
-               p = read_buf(xdr, 2 * sizeof(uint64_t));
+               p = xdr_inline_decode(xdr, 2 * sizeof(uint64_t));
                if (unlikely(p == NULL))
                        return htonl(NFS4ERR_BADXDR);
                p = xdr_decode_hyper(p, &args->cbl_range.offset);
                p = xdr_decode_hyper(p, &args->cbl_range.length);
                return decode_layout_stateid(xdr, &args->cbl_stateid);
        } else if (args->cbl_recall_type == RETURN_FSID) {
-               p = read_buf(xdr, 2 * sizeof(uint64_t));
+               p = xdr_inline_decode(xdr, 2 * sizeof(uint64_t));
                if (unlikely(p == NULL))
                        return htonl(NFS4ERR_BADXDR);
                p = xdr_decode_hyper(p, &args->cbl_fsid.major);
@@ -275,7 +265,7 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
        args->ndevs = 0;
 
        /* Num of device notifications */
-       p = read_buf(xdr, sizeof(uint32_t));
+       p = xdr_inline_decode(xdr, sizeof(uint32_t));
        if (unlikely(p == NULL)) {
                status = htonl(NFS4ERR_BADXDR);
                goto out;
@@ -298,7 +288,8 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
        for (i = 0; i < n; i++) {
                struct cb_devicenotifyitem *dev = &args->devs[i];
 
-               p = read_buf(xdr, (4 * sizeof(uint32_t)) + NFS4_DEVICEID4_SIZE);
+               p = xdr_inline_decode(xdr, (4 * sizeof(uint32_t)) +
+                                     NFS4_DEVICEID4_SIZE);
                if (unlikely(p == NULL)) {
                        status = htonl(NFS4ERR_BADXDR);
                        goto err;
@@ -329,7 +320,7 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
                p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
 
                if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) {
-                       p = read_buf(xdr, sizeof(uint32_t));
+                       p = xdr_inline_decode(xdr, sizeof(uint32_t));
                        if (unlikely(p == NULL)) {
                                status = htonl(NFS4ERR_BADXDR);
                                goto err;
@@ -359,7 +350,7 @@ static __be32 decode_sessionid(struct xdr_stream *xdr,
 {
        __be32 *p;
 
-       p = read_buf(xdr, NFS4_MAX_SESSIONID_LEN);
+       p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN);
        if (unlikely(p == NULL))
                return htonl(NFS4ERR_RESOURCE);
 
@@ -379,13 +370,13 @@ static __be32 decode_rc_list(struct xdr_stream *xdr,
                goto out;
 
        status = htonl(NFS4ERR_RESOURCE);
-       p = read_buf(xdr, sizeof(uint32_t));
+       p = xdr_inline_decode(xdr, sizeof(uint32_t));
        if (unlikely(p == NULL))
                goto out;
 
        rc_list->rcl_nrefcalls = ntohl(*p++);
        if (rc_list->rcl_nrefcalls) {
-               p = read_buf(xdr,
+               p = xdr_inline_decode(xdr,
                             rc_list->rcl_nrefcalls * 2 * sizeof(uint32_t));
                if (unlikely(p == NULL))
                        goto out;
@@ -418,7 +409,7 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
        if (status)
                return status;
 
-       p = read_buf(xdr, 5 * sizeof(uint32_t));
+       p = xdr_inline_decode(xdr, 5 * sizeof(uint32_t));
        if (unlikely(p == NULL))
                return htonl(NFS4ERR_RESOURCE);
 
@@ -461,7 +452,7 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp,
        uint32_t bitmap[2];
        __be32 *p, status;
 
-       p = read_buf(xdr, 4);
+       p = xdr_inline_decode(xdr, 4);
        if (unlikely(p == NULL))
                return htonl(NFS4ERR_BADXDR);
        args->craa_objs_to_keep = ntohl(*p++);
@@ -480,7 +471,7 @@ static __be32 decode_recallslot_args(struct svc_rqst *rqstp,
        struct cb_recallslotargs *args = argp;
        __be32 *p;
 
-       p = read_buf(xdr, 4);
+       p = xdr_inline_decode(xdr, 4);
        if (unlikely(p == NULL))
                return htonl(NFS4ERR_BADXDR);
        args->crsa_target_highest_slotid = ntohl(*p++);
@@ -492,14 +483,14 @@ static __be32 decode_lockowner(struct xdr_stream *xdr, struct cb_notify_lock_arg
        __be32          *p;
        unsigned int    len;
 
-       p = read_buf(xdr, 12);
+       p = xdr_inline_decode(xdr, 12);
        if (unlikely(p == NULL))
                return htonl(NFS4ERR_BADXDR);
 
        p = xdr_decode_hyper(p, &args->cbnl_owner.clientid);
        len = be32_to_cpu(*p);
 
-       p = read_buf(xdr, len);
+       p = xdr_inline_decode(xdr, len);
        if (unlikely(p == NULL))
                return htonl(NFS4ERR_BADXDR);
 
@@ -537,7 +528,7 @@ static __be32 decode_write_response(struct xdr_stream *xdr,
        __be32 *p;
 
        /* skip the always zero field */
-       p = read_buf(xdr, 4);
+       p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
                goto out;
        p++;
@@ -577,7 +568,7 @@ static __be32 decode_offload_args(struct svc_rqst *rqstp,
                return status;
 
        /* decode status */
-       p = read_buf(xdr, 4);
+       p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
                goto out;
        args->error = ntohl(*p++);
@@ -943,10 +934,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
        };
        unsigned int nops = 0;
 
-       xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
+       xdr_init_decode(&xdr_in, &rqstp->rq_arg,
+                       rqstp->rq_arg.head[0].iov_base, NULL);
 
        p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
-       xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
+       xdr_init_encode(&xdr_out, &rqstp->rq_res, p, NULL);
 
        status = decode_compound_hdr_arg(&xdr_in, &hdr_arg);
        if (status == htonl(NFS4ERR_RESOURCE))
index 885363ca8569909d96b15f8480fd4d57be1d0119..2f6b447cdd8256c393fcf1cc4580a7174e8a3055 100644 (file)
@@ -229,6 +229,8 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
        spin_lock(&delegation->lock);
        if (delegation->inode != NULL)
                inode = igrab(delegation->inode);
+       if (!inode)
+               set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags);
        spin_unlock(&delegation->lock);
        return inode;
 }
@@ -681,7 +683,7 @@ void nfs_expire_all_delegations(struct nfs_client *clp)
 
 /**
  * nfs_super_return_all_delegations - return delegations for one superblock
- * @sb: sb to process
+ * @server: pointer to nfs_server to process
  *
  */
 void nfs_server_return_all_delegations(struct nfs_server *server)
@@ -944,10 +946,11 @@ restart:
        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
                list_for_each_entry_rcu(delegation, &server->delegations,
                                                                super_list) {
-                       if (test_bit(NFS_DELEGATION_RETURNING,
-                                               &delegation->flags))
-                               continue;
-                       if (test_bit(NFS_DELEGATION_NEED_RECLAIM,
+                       if (test_bit(NFS_DELEGATION_INODE_FREEING,
+                                               &delegation->flags) ||
+                           test_bit(NFS_DELEGATION_RETURNING,
+                                               &delegation->flags) ||
+                           test_bit(NFS_DELEGATION_NEED_RECLAIM,
                                                &delegation->flags) == 0)
                                continue;
                        if (!nfs_sb_active(server->super))
@@ -1053,10 +1056,11 @@ restart:
        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
                list_for_each_entry_rcu(delegation, &server->delegations,
                                                                super_list) {
-                       if (test_bit(NFS_DELEGATION_RETURNING,
-                                               &delegation->flags))
-                               continue;
-                       if (test_bit(NFS_DELEGATION_TEST_EXPIRED,
+                       if (test_bit(NFS_DELEGATION_INODE_FREEING,
+                                               &delegation->flags) ||
+                           test_bit(NFS_DELEGATION_RETURNING,
+                                               &delegation->flags) ||
+                           test_bit(NFS_DELEGATION_TEST_EXPIRED,
                                                &delegation->flags) == 0)
                                continue;
                        if (!nfs_sb_active(server->super))
index dcbf3394ba0e0431fdbbf17486cf3e6d3bda1290..35b4b02c1ae01d04478b4f4de56c5f59b3ebc911 100644 (file)
@@ -34,6 +34,7 @@ enum {
        NFS_DELEGATION_RETURNING,
        NFS_DELEGATION_REVOKED,
        NFS_DELEGATION_TEST_EXPIRED,
+       NFS_DELEGATION_INODE_FREEING,
 };
 
 int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
index 6bf4471850c8cbe329699f3fa5eaa54c6daaa685..a71d0b42d16053f65a76a885b5fe1c38b819df1c 100644 (file)
@@ -139,12 +139,19 @@ struct nfs_cache_array {
        struct nfs_cache_array_entry array[0];
 };
 
+struct readdirvec {
+       unsigned long nr;
+       unsigned long index;
+       struct page *pages[NFS_MAX_READDIR_RAPAGES];
+};
+
 typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, bool);
 typedef struct {
        struct file     *file;
        struct page     *page;
        struct dir_context *ctx;
        unsigned long   page_index;
+       struct readdirvec pvec;
        u64             *dir_cookie;
        u64             last_cookie;
        loff_t          current_index;
@@ -524,6 +531,10 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
        struct nfs_cache_array *array;
        unsigned int count = 0;
        int status;
+       int max_rapages = NFS_MAX_READDIR_RAPAGES;
+
+       desc->pvec.index = desc->page_index;
+       desc->pvec.nr = 0;
 
        scratch = alloc_page(GFP_KERNEL);
        if (scratch == NULL)
@@ -548,20 +559,40 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
                if (desc->plus)
                        nfs_prime_dcache(file_dentry(desc->file), entry);
 
-               status = nfs_readdir_add_to_array(entry, page);
+               status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]);
+               if (status == -ENOSPC) {
+                       desc->pvec.nr++;
+                       if (desc->pvec.nr == max_rapages)
+                               break;
+                       status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]);
+               }
                if (status != 0)
                        break;
        } while (!entry->eof);
 
+       /*
+        * page and desc->pvec.pages[0] are valid, don't need to check
+        * whether or not to be NULL.
+        */
+       copy_highpage(page, desc->pvec.pages[0]);
+
 out_nopages:
        if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
-               array = kmap(page);
+               array = kmap_atomic(desc->pvec.pages[desc->pvec.nr]);
                array->eof_index = array->size;
                status = 0;
-               kunmap(page);
+               kunmap_atomic(array);
        }
 
        put_page(scratch);
+
+       /*
+        * desc->pvec.nr > 0 means at least one page was completely filled,
+        * we should return -ENOSPC. Otherwise function
+        * nfs_readdir_xdr_to_array will enter infinite loop.
+        */
+       if (desc->pvec.nr > 0)
+               return -ENOSPC;
        return status;
 }
 
@@ -574,8 +605,8 @@ void nfs_readdir_free_pages(struct page **pages, unsigned int npages)
 }
 
 /*
- * nfs_readdir_large_page will allocate pages that must be freed with a call
- * to nfs_readdir_free_pagearray
+ * nfs_readdir_alloc_pages() will allocate pages that must be freed with a call
+ * to nfs_readdir_free_pages()
  */
 static
 int nfs_readdir_alloc_pages(struct page **pages, unsigned int npages)
@@ -595,6 +626,24 @@ out_freepages:
        return -ENOMEM;
 }
 
+/*
+ * nfs_readdir_rapages_init initialize rapages by nfs_cache_array structure.
+ */
+static
+void nfs_readdir_rapages_init(nfs_readdir_descriptor_t *desc)
+{
+       struct nfs_cache_array *array;
+       int max_rapages = NFS_MAX_READDIR_RAPAGES;
+       int index;
+
+       for (index = 0; index < max_rapages; index++) {
+               array = kmap_atomic(desc->pvec.pages[index]);
+               memset(array, 0, sizeof(struct nfs_cache_array));
+               array->eof_index = -1;
+               kunmap_atomic(array);
+       }
+}
+
 static
 int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
 {
@@ -605,6 +654,12 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
        int status = -ENOMEM;
        unsigned int array_size = ARRAY_SIZE(pages);
 
+       /*
+        * This means we hit readdir rdpages miss, the preallocated rdpages
+        * are useless, the preallocate rdpages should be reinitialized.
+        */
+       nfs_readdir_rapages_init(desc);
+
        entry.prev_cookie = 0;
        entry.cookie = desc->last_cookie;
        entry.eof = 0;
@@ -664,9 +719,24 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
        struct inode    *inode = file_inode(desc->file);
        int ret;
 
-       ret = nfs_readdir_xdr_to_array(desc, page, inode);
-       if (ret < 0)
-               goto error;
+       /*
+        * If desc->page_index in range desc->pvec.index and
+        * desc->pvec.index + desc->pvec.nr, we get readdir cache hit.
+        */
+       if (desc->page_index >= desc->pvec.index &&
+               desc->page_index < (desc->pvec.index + desc->pvec.nr)) {
+               /*
+                * page and desc->pvec.pages[x] are valid, don't need to check
+                * whether or not to be NULL.
+                */
+               copy_highpage(page, desc->pvec.pages[desc->page_index - desc->pvec.index]);
+               ret = 0;
+       } else {
+               ret = nfs_readdir_xdr_to_array(desc, page, inode);
+               if (ret < 0)
+                       goto error;
+       }
+
        SetPageUptodate(page);
 
        if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
@@ -831,6 +901,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
                        *desc = &my_desc;
        struct nfs_open_dir_context *dir_ctx = file->private_data;
        int res = 0;
+       int max_rapages = NFS_MAX_READDIR_RAPAGES;
 
        dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
                        file, (long long)ctx->pos);
@@ -850,6 +921,12 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
        desc->decode = NFS_PROTO(inode)->decode_dirent;
        desc->plus = nfs_use_readdirplus(inode, ctx);
 
+       res = nfs_readdir_alloc_pages(desc->pvec.pages, max_rapages);
+       if (res < 0)
+               return -ENOMEM;
+
+       nfs_readdir_rapages_init(desc);
+
        if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
                res = nfs_revalidate_mapping(inode, file->f_mapping);
        if (res < 0)
@@ -885,6 +962,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
                        break;
        } while (!desc->eof);
 out:
+       nfs_readdir_free_pages(desc->pvec.pages, max_rapages);
        if (res > 0)
                res = 0;
        dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
@@ -945,7 +1023,7 @@ static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
 
 /**
  * nfs_force_lookup_revalidate - Mark the directory as having changed
- * @dir - pointer to directory inode
+ * @dir: pointer to directory inode
  *
  * This forces the revalidation code in nfs_lookup_revalidate() to do a
  * full lookup on all child dentries of 'dir' whenever a change occurs
@@ -1649,7 +1727,7 @@ nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
 reval_dentry:
        if (flags & LOOKUP_RCU)
                return -ECHILD;
-       return nfs_lookup_revalidate_dentry(dir, dentry, inode);;
+       return nfs_lookup_revalidate_dentry(dir, dentry, inode);
 
 full_reval:
        return nfs_do_lookup_revalidate(dir, dentry, flags);
index 33824a0a57bfe5de9e31f4d13e4d2eebc3b7b2df..0fd811ac08b525d2a5962a168402557a1e01843b 100644 (file)
@@ -428,7 +428,7 @@ out_put:
        hdr->release(hdr);
 }
 
-static void nfs_read_sync_pgio_error(struct list_head *head)
+static void nfs_read_sync_pgio_error(struct list_head *head, int error)
 {
        struct nfs_page *req;
 
@@ -664,8 +664,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 
        list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
                if (!nfs_pageio_add_request(&desc, req)) {
-                       nfs_list_remove_request(req);
-                       nfs_list_add_request(req, &failed);
+                       nfs_list_move_request(req, &failed);
                        spin_lock(&cinfo.inode->i_lock);
                        dreq->flags = 0;
                        if (desc.pg_error < 0)
@@ -821,7 +820,7 @@ out_put:
        hdr->release(hdr);
 }
 
-static void nfs_write_sync_pgio_error(struct list_head *head)
+static void nfs_write_sync_pgio_error(struct list_head *head, int error)
 {
        struct nfs_page *req;
 
index 29553fdba8af7ec61c11a19bfaeb8be4a7b0ddb4..4899b85f9b3c2cf30de9486b4279de01999f07d2 100644 (file)
@@ -89,8 +89,8 @@ EXPORT_SYMBOL_GPL(nfs_file_release);
 
 /**
  * nfs_revalidate_size - Revalidate the file size
- * @inode - pointer to inode struct
- * @file - pointer to struct file
+ * @inode: pointer to inode struct
+ * @filp: pointer to struct file
  *
  * Revalidates the file length. This is basically a wrapper around
  * nfs_revalidate_inode() that takes into account the fact that we may
@@ -276,6 +276,12 @@ EXPORT_SYMBOL_GPL(nfs_file_fsync);
  * then a modify/write/read cycle when writing to a page in the
  * page cache.
  *
+ * Some pNFS layout drivers can only read/write at a certain block
+ * granularity like all block devices and therefore we must perform
+ * read/modify/write whenever a page hasn't read yet and the data
+ * to be written there is not aligned to a block boundary and/or
+ * smaller than the block size.
+ *
  * The modify/write/read cycle may occur if a page is read before
  * being completely filled by the writer.  In this situation, the
  * page must be completely written to stable storage on the server
@@ -291,26 +297,32 @@ EXPORT_SYMBOL_GPL(nfs_file_fsync);
  * and that the new data won't completely replace the old data in
  * that range of the file.
  */
-static int nfs_want_read_modify_write(struct file *file, struct page *page,
-                       loff_t pos, unsigned len)
+static bool nfs_full_page_write(struct page *page, loff_t pos, unsigned int len)
 {
        unsigned int pglen = nfs_page_length(page);
        unsigned int offset = pos & (PAGE_SIZE - 1);
        unsigned int end = offset + len;
 
-       if (pnfs_ld_read_whole_page(file->f_mapping->host)) {
-               if (!PageUptodate(page))
-                       return 1;
-               return 0;
-       }
+       return !pglen || (end >= pglen && !offset);
+}
 
-       if ((file->f_mode & FMODE_READ) &&      /* open for read? */
-           !PageUptodate(page) &&              /* Uptodate? */
-           !PagePrivate(page) &&               /* i/o request already? */
-           pglen &&                            /* valid bytes of file? */
-           (end < pglen || offset))            /* replace all valid bytes? */
-               return 1;
-       return 0;
+static bool nfs_want_read_modify_write(struct file *file, struct page *page,
+                       loff_t pos, unsigned int len)
+{
+       /*
+        * Up-to-date pages, those with ongoing or full-page write
+        * don't need read/modify/write
+        */
+       if (PageUptodate(page) || PagePrivate(page) ||
+           nfs_full_page_write(page, pos, len))
+               return false;
+
+       if (pnfs_ld_read_whole_page(file->f_mapping->host))
+               return true;
+       /* Open for reading too? */
+       if (file->f_mode & FMODE_READ)
+               return true;
+       return false;
 }
 
 /*
index 63abe705f4cabe133d28e94236039e7a0c69887d..f9264e1922a28b836367b145c215d9ceb8883843 100644 (file)
@@ -410,7 +410,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
        for (i = 0; i < fls->mirror_array_cnt; i++) {
                struct nfs4_ff_layout_mirror *mirror;
                struct cred *kcred;
-               const struct cred *cred;
+               const struct cred __rcu *cred;
                kuid_t uid;
                kgid_t gid;
                u32 ds_count, fh_count, id;
@@ -501,7 +501,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
                        goto out_err_free;
                kcred->fsuid = uid;
                kcred->fsgid = gid;
-               cred = kcred;
+               cred = RCU_INITIALIZER(kcred);
 
                if (lgr->range.iomode == IOMODE_READ)
                        rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred);
@@ -788,30 +788,82 @@ ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg,
        }
 }
 
+static void
+ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx)
+{
+       struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
+
+       if (devid)
+               nfs4_mark_deviceid_unavailable(devid);
+}
+
+static void
+ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx)
+{
+       struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
+
+       if (devid)
+               nfs4_mark_deviceid_available(devid);
+}
+
 static struct nfs4_pnfs_ds *
-ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
-                                 int start_idx,
-                                 int *best_idx)
+ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
+                            int start_idx, int *best_idx,
+                            bool check_device)
 {
        struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
+       struct nfs4_ff_layout_mirror *mirror;
        struct nfs4_pnfs_ds *ds;
        bool fail_return = false;
        int idx;
 
-       /* mirrors are sorted by efficiency */
+       /* mirrors are initially sorted by efficiency */
        for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
                if (idx+1 == fls->mirror_array_cnt)
-                       fail_return = true;
-               ds = nfs4_ff_layout_prepare_ds(lseg, idx, fail_return);
-               if (ds) {
-                       *best_idx = idx;
-                       return ds;
-               }
+                       fail_return = !check_device;
+
+               mirror = FF_LAYOUT_COMP(lseg, idx);
+               ds = nfs4_ff_layout_prepare_ds(lseg, mirror, fail_return);
+               if (!ds)
+                       continue;
+
+               if (check_device &&
+                   nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node))
+                       continue;
+
+               *best_idx = idx;
+               return ds;
        }
 
        return NULL;
 }
 
+static struct nfs4_pnfs_ds *
+ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg,
+                                int start_idx, int *best_idx)
+{
+       return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false);
+}
+
+static struct nfs4_pnfs_ds *
+ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg,
+                                  int start_idx, int *best_idx)
+{
+       return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true);
+}
+
+static struct nfs4_pnfs_ds *
+ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
+                                 int start_idx, int *best_idx)
+{
+       struct nfs4_pnfs_ds *ds;
+
+       ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx);
+       if (ds)
+               return ds;
+       return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx);
+}
+
 static void
 ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
                      struct nfs_page *req,
@@ -925,7 +977,8 @@ retry:
                goto out_mds;
 
        for (i = 0; i < pgio->pg_mirror_count; i++) {
-               ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true);
+               mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
+               ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true);
                if (!ds) {
                        if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
                                goto out_mds;
@@ -936,7 +989,6 @@ retry:
                        goto retry;
                }
                pgm = &pgio->pg_mirrors[i];
-               mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
                pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize;
        }
 
@@ -1071,6 +1123,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
                break;
        case -NFS4ERR_RETRY_UNCACHED_REP:
                break;
+       case -EAGAIN:
+               return -NFS4ERR_RESET_TO_PNFS;
        /* Invalidate Layout errors */
        case -NFS4ERR_PNFS_NO_LAYOUT:
        case -ESTALE:           /* mapped NFS4ERR_STALE */
@@ -1131,6 +1185,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
        case -EBADHANDLE:
        case -ELOOP:
        case -ENOSPC:
+       case -EAGAIN:
                break;
        case -EJUKEBOX:
                nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
@@ -1158,8 +1213,10 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
 {
        int vers = clp->cl_nfs_mod->rpc_vers->number;
 
-       if (task->tk_status >= 0)
+       if (task->tk_status >= 0) {
+               ff_layout_mark_ds_reachable(lseg, idx);
                return 0;
+       }
 
        /* Handle the case of an invalid layout segment */
        if (!pnfs_is_valid_lseg(lseg))
@@ -1222,6 +1279,8 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
        err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
                                       mirror, offset, length, status, opnum,
                                       GFP_NOIO);
+       if (status == NFS4ERR_NXIO)
+               ff_layout_mark_ds_unreachable(lseg, idx);
        pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg);
        dprintk("%s: err %d op %d status %u\n", __func__, err, opnum, status);
 }
@@ -1249,7 +1308,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
                if (ff_layout_choose_best_ds_for_read(hdr->lseg,
                                        hdr->pgio_mirror_idx + 1,
                                        &hdr->pgio_mirror_idx))
-                       goto out_eagain;
+                       goto out_layouterror;
                set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
                return task->tk_status;
        case -NFS4ERR_RESET_TO_MDS:
@@ -1260,6 +1319,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
        }
 
        return 0;
+out_layouterror:
+       ff_layout_send_layouterror(hdr->lseg);
 out_eagain:
        rpc_restart_call_prepare(task);
        return -EAGAIN;
@@ -1293,15 +1354,6 @@ ff_layout_set_layoutcommit(struct inode *inode,
                (unsigned long long) NFS_I(inode)->layout->plh_lwb);
 }
 
-static bool
-ff_layout_device_unavailable(struct pnfs_layout_segment *lseg, int idx)
-{
-       /* No mirroring for now */
-       struct nfs4_deviceid_node *node = FF_LAYOUT_DEVID_NODE(lseg, idx);
-
-       return ff_layout_test_devid_unavailable(node);
-}
-
 static void ff_layout_read_record_layoutstats_start(struct rpc_task *task,
                struct nfs_pgio_header *hdr)
 {
@@ -1332,10 +1384,6 @@ static int ff_layout_read_prepare_common(struct rpc_task *task,
                rpc_exit(task, -EIO);
                return -EIO;
        }
-       if (ff_layout_device_unavailable(hdr->lseg, hdr->pgio_mirror_idx)) {
-               rpc_exit(task, -EHOSTDOWN);
-               return -EAGAIN;
-       }
 
        ff_layout_read_record_layoutstats_start(task, hdr);
        return 0;
@@ -1369,6 +1417,16 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data)
        ff_layout_read_prepare_common(task, hdr);
 }
 
+static void
+ff_layout_io_prepare_transmit(struct rpc_task *task,
+               void *data)
+{
+       struct nfs_pgio_header *hdr = data;
+
+       if (!pnfs_is_valid_lseg(hdr->lseg))
+               rpc_exit(task, -EAGAIN);
+}
+
 static void ff_layout_read_call_done(struct rpc_task *task, void *data)
 {
        struct nfs_pgio_header *hdr = data;
@@ -1399,9 +1457,10 @@ static void ff_layout_read_release(void *data)
        struct nfs_pgio_header *hdr = data;
 
        ff_layout_read_record_layoutstats_done(&hdr->task, hdr);
-       if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
+       if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) {
+               ff_layout_send_layouterror(hdr->lseg);
                pnfs_read_resend_pnfs(hdr);
-       else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
+       else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
                ff_layout_reset_read(hdr);
        pnfs_generic_rw_release(data);
 }
@@ -1513,11 +1572,6 @@ static int ff_layout_write_prepare_common(struct rpc_task *task,
                return -EIO;
        }
 
-       if (ff_layout_device_unavailable(hdr->lseg, hdr->pgio_mirror_idx)) {
-               rpc_exit(task, -EHOSTDOWN);
-               return -EAGAIN;
-       }
-
        ff_layout_write_record_layoutstats_start(task, hdr);
        return 0;
 }
@@ -1573,9 +1627,10 @@ static void ff_layout_write_release(void *data)
        struct nfs_pgio_header *hdr = data;
 
        ff_layout_write_record_layoutstats_done(&hdr->task, hdr);
-       if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
+       if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) {
+               ff_layout_send_layouterror(hdr->lseg);
                ff_layout_reset_write(hdr, true);
-       else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
+       else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
                ff_layout_reset_write(hdr, false);
        pnfs_generic_rw_release(data);
 }
@@ -1657,6 +1712,7 @@ static void ff_layout_commit_release(void *data)
 
 static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
        .rpc_call_prepare = ff_layout_read_prepare_v3,
+       .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
        .rpc_call_done = ff_layout_read_call_done,
        .rpc_count_stats = ff_layout_read_count_stats,
        .rpc_release = ff_layout_read_release,
@@ -1664,6 +1720,7 @@ static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
 
 static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
        .rpc_call_prepare = ff_layout_read_prepare_v4,
+       .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
        .rpc_call_done = ff_layout_read_call_done,
        .rpc_count_stats = ff_layout_read_count_stats,
        .rpc_release = ff_layout_read_release,
@@ -1671,6 +1728,7 @@ static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
 
 static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
        .rpc_call_prepare = ff_layout_write_prepare_v3,
+       .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
        .rpc_call_done = ff_layout_write_call_done,
        .rpc_count_stats = ff_layout_write_count_stats,
        .rpc_release = ff_layout_write_release,
@@ -1678,6 +1736,7 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
 
 static const struct rpc_call_ops ff_layout_write_call_ops_v4 = {
        .rpc_call_prepare = ff_layout_write_prepare_v4,
+       .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
        .rpc_call_done = ff_layout_write_call_done,
        .rpc_count_stats = ff_layout_write_count_stats,
        .rpc_release = ff_layout_write_release,
@@ -1703,6 +1762,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
        struct pnfs_layout_segment *lseg = hdr->lseg;
        struct nfs4_pnfs_ds *ds;
        struct rpc_clnt *ds_clnt;
+       struct nfs4_ff_layout_mirror *mirror;
        const struct cred *ds_cred;
        loff_t offset = hdr->args.offset;
        u32 idx = hdr->pgio_mirror_idx;
@@ -1713,20 +1773,21 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
                __func__, hdr->inode->i_ino,
                hdr->args.pgbase, (size_t)hdr->args.count, offset);
 
-       ds = nfs4_ff_layout_prepare_ds(lseg, idx, false);
+       mirror = FF_LAYOUT_COMP(lseg, idx);
+       ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
        if (!ds)
                goto out_failed;
 
-       ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
+       ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
                                                   hdr->inode);
        if (IS_ERR(ds_clnt))
                goto out_failed;
 
-       ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred);
+       ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred);
        if (!ds_cred)
                goto out_failed;
 
-       vers = nfs4_ff_layout_ds_version(lseg, idx);
+       vers = nfs4_ff_layout_ds_version(mirror);
 
        dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__,
                ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers);
@@ -1734,13 +1795,11 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
        hdr->pgio_done_cb = ff_layout_read_done_cb;
        refcount_inc(&ds->ds_clp->cl_count);
        hdr->ds_clp = ds->ds_clp;
-       fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
+       fh = nfs4_ff_layout_select_ds_fh(mirror);
        if (fh)
                hdr->args.fh = fh;
 
-       if (vers == 4 &&
-               !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
-               goto out_failed;
+       nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid);
 
        /*
         * Note that if we ever decide to split across DSes,
@@ -1770,26 +1829,28 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
        struct pnfs_layout_segment *lseg = hdr->lseg;
        struct nfs4_pnfs_ds *ds;
        struct rpc_clnt *ds_clnt;
+       struct nfs4_ff_layout_mirror *mirror;
        const struct cred *ds_cred;
        loff_t offset = hdr->args.offset;
        int vers;
        struct nfs_fh *fh;
        int idx = hdr->pgio_mirror_idx;
 
-       ds = nfs4_ff_layout_prepare_ds(lseg, idx, true);
+       mirror = FF_LAYOUT_COMP(lseg, idx);
+       ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
        if (!ds)
                goto out_failed;
 
-       ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
+       ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
                                                   hdr->inode);
        if (IS_ERR(ds_clnt))
                goto out_failed;
 
-       ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred);
+       ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred);
        if (!ds_cred)
                goto out_failed;
 
-       vers = nfs4_ff_layout_ds_version(lseg, idx);
+       vers = nfs4_ff_layout_ds_version(mirror);
 
        dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n",
                __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
@@ -1800,13 +1861,11 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
        refcount_inc(&ds->ds_clp->cl_count);
        hdr->ds_clp = ds->ds_clp;
        hdr->ds_commit_idx = idx;
-       fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
+       fh = nfs4_ff_layout_select_ds_fh(mirror);
        if (fh)
                hdr->args.fh = fh;
 
-       if (vers == 4 &&
-               !nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
-               goto out_failed;
+       nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid);
 
        /*
         * Note that if we ever decide to split across DSes,
@@ -1849,6 +1908,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
        struct pnfs_layout_segment *lseg = data->lseg;
        struct nfs4_pnfs_ds *ds;
        struct rpc_clnt *ds_clnt;
+       struct nfs4_ff_layout_mirror *mirror;
        const struct cred *ds_cred;
        u32 idx;
        int vers, ret;
@@ -1859,20 +1919,21 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
                goto out_err;
 
        idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
-       ds = nfs4_ff_layout_prepare_ds(lseg, idx, true);
+       mirror = FF_LAYOUT_COMP(lseg, idx);
+       ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
        if (!ds)
                goto out_err;
 
-       ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
+       ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
                                                   data->inode);
        if (IS_ERR(ds_clnt))
                goto out_err;
 
-       ds_cred = ff_layout_get_ds_cred(lseg, idx, data->cred);
+       ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, data->cred);
        if (!ds_cred)
                goto out_err;
 
-       vers = nfs4_ff_layout_ds_version(lseg, idx);
+       vers = nfs4_ff_layout_ds_version(mirror);
 
        dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__,
                data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count),
@@ -2036,7 +2097,7 @@ ff_layout_encode_layoutreturn(struct xdr_stream *xdr,
 
        dprintk("%s: Begin\n", __func__);
 
-       xdr_init_encode(&tmp_xdr, &tmp_buf, NULL);
+       xdr_init_encode(&tmp_xdr, &tmp_buf, NULL, NULL);
 
        ff_layout_encode_ioerr(&tmp_xdr, args, ff_args);
        ff_layout_encode_iostats_array(&tmp_xdr, args, ff_args);
@@ -2102,6 +2163,52 @@ out_nomem:
        return -ENOMEM;
 }
 
+#ifdef CONFIG_NFS_V4_2
+void
+ff_layout_send_layouterror(struct pnfs_layout_segment *lseg)
+{
+       struct pnfs_layout_hdr *lo = lseg->pls_layout;
+       struct nfs42_layout_error *errors;
+       LIST_HEAD(head);
+
+       if (!nfs_server_capable(lo->plh_inode, NFS_CAP_LAYOUTERROR))
+               return;
+       ff_layout_fetch_ds_ioerr(lo, &lseg->pls_range, &head, -1);
+       if (list_empty(&head))
+               return;
+
+       errors = kmalloc_array(NFS42_LAYOUTERROR_MAX,
+                       sizeof(*errors), GFP_NOFS);
+       if (errors != NULL) {
+               const struct nfs4_ff_layout_ds_err *pos;
+               size_t n = 0;
+
+               list_for_each_entry(pos, &head, list) {
+                       errors[n].offset = pos->offset;
+                       errors[n].length = pos->length;
+                       nfs4_stateid_copy(&errors[n].stateid, &pos->stateid);
+                       errors[n].errors[0].dev_id = pos->deviceid;
+                       errors[n].errors[0].status = pos->status;
+                       errors[n].errors[0].opnum = pos->opnum;
+                       n++;
+                       if (!list_is_last(&pos->list, &head) &&
+                           n < NFS42_LAYOUTERROR_MAX)
+                               continue;
+                       if (nfs42_proc_layouterror(lseg, errors, n) < 0)
+                               break;
+                       n = 0;
+               }
+               kfree(errors);
+       }
+       ff_layout_free_ds_ioerr(&head);
+}
+#else
+void
+ff_layout_send_layouterror(struct pnfs_layout_segment *lseg)
+{
+}
+#endif
+
 static int
 ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen)
 {
index c2626bad466b22addbdf404ab462ec4df8c1f5f7..2f369966abf7179c625f5d046e52cbfa1fc80eb0 100644 (file)
@@ -132,16 +132,6 @@ FF_LAYOUT_LSEG(struct pnfs_layout_segment *lseg)
                            generic_hdr);
 }
 
-static inline struct nfs4_deviceid_node *
-FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx)
-{
-       if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt ||
-           FF_LAYOUT_LSEG(lseg)->mirror_array[idx] == NULL ||
-           FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds == NULL)
-               return NULL;
-       return &FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds->id_node;
-}
-
 static inline struct nfs4_ff_layout_ds *
 FF_LAYOUT_MIRROR_DS(struct nfs4_deviceid_node *node)
 {
@@ -151,9 +141,25 @@ FF_LAYOUT_MIRROR_DS(struct nfs4_deviceid_node *node)
 static inline struct nfs4_ff_layout_mirror *
 FF_LAYOUT_COMP(struct pnfs_layout_segment *lseg, u32 idx)
 {
-       if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt)
-               return NULL;
-       return FF_LAYOUT_LSEG(lseg)->mirror_array[idx];
+       struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
+
+       if (idx < fls->mirror_array_cnt)
+               return fls->mirror_array[idx];
+       return NULL;
+}
+
+static inline struct nfs4_deviceid_node *
+FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx)
+{
+       struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, idx);
+
+       if (mirror != NULL) {
+               struct nfs4_ff_layout_ds *mirror_ds = mirror->mirror_ds;
+
+               if (!IS_ERR_OR_NULL(mirror_ds))
+                       return &mirror_ds->id_node;
+       }
+       return NULL;
 }
 
 static inline u32
@@ -174,28 +180,10 @@ ff_layout_no_read_on_rw(struct pnfs_layout_segment *lseg)
        return FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_READ_IO;
 }
 
-static inline bool
-ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node)
-{
-       /*
-        * Flexfiles should never mark a DS unavailable, but if it does
-        * print a (ratelimited) warning as this can affect performance.
-        */
-       if (nfs4_test_deviceid_unavailable(node)) {
-               u32 *p = (u32 *)node->deviceid.data;
-
-               pr_warn_ratelimited("NFS: flexfiles layout referencing an "
-                               "unavailable device [%x%x%x%x]\n",
-                               p[0], p[1], p[2], p[3]);
-               return true;
-       }
-       return false;
-}
-
 static inline int
-nfs4_ff_layout_ds_version(struct pnfs_layout_segment *lseg, u32 ds_idx)
+nfs4_ff_layout_ds_version(const struct nfs4_ff_layout_mirror *mirror)
 {
-       return FF_LAYOUT_COMP(lseg, ds_idx)->mirror_ds->ds_versions[0].version;
+       return mirror->mirror_ds->ds_versions[0].version;
 }
 
 struct nfs4_ff_layout_ds *
@@ -207,6 +195,7 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
                             struct nfs4_ff_layout_mirror *mirror, u64 offset,
                             u64 length, int status, enum nfs_opnum4 opnum,
                             gfp_t gfp_flags);
+void ff_layout_send_layouterror(struct pnfs_layout_segment *lseg);
 int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head);
 void ff_layout_free_ds_ioerr(struct list_head *head);
 unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo,
@@ -214,23 +203,23 @@ unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo,
                struct list_head *head,
                unsigned int maxnum);
 struct nfs_fh *
-nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx);
-int
-nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg,
-                               u32 mirror_idx,
-                               nfs4_stateid *stateid);
+nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror);
+void
+nfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror *mirror,
+               nfs4_stateid *stateid);
 
 struct nfs4_pnfs_ds *
-nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
+nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
+                         struct nfs4_ff_layout_mirror *mirror,
                          bool fail_return);
 
 struct rpc_clnt *
-nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg,
-                                u32 ds_idx,
+nfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror *mirror,
                                 struct nfs_client *ds_clp,
                                 struct inode *inode);
-const struct cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg,
-                                      u32 ds_idx, const struct cred *mdscred);
+const struct cred *ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror,
+                                        const struct pnfs_layout_range *range,
+                                        const struct cred *mdscred);
 bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg);
 bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg);
 
index 11766a74216d5a84509e0ad27dba0de7c8166560..a809989807d681474fb658de8ec594a6c4586374 100644 (file)
@@ -183,56 +183,6 @@ out_err:
        return NULL;
 }
 
-static void ff_layout_mark_devid_invalid(struct pnfs_layout_segment *lseg,
-               struct nfs4_deviceid_node *devid)
-{
-       nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid);
-       if (!ff_layout_has_available_ds(lseg))
-               pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode,
-                               lseg);
-}
-
-static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg,
-                                  struct nfs4_ff_layout_mirror *mirror,
-                                  bool create)
-{
-       if (mirror == NULL || IS_ERR(mirror->mirror_ds))
-               goto outerr;
-       if (mirror->mirror_ds == NULL) {
-               if (create) {
-                       struct nfs4_deviceid_node *node;
-                       struct pnfs_layout_hdr *lh = lseg->pls_layout;
-                       struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV);
-
-                       node = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode),
-                                       &mirror->devid, lh->plh_lc_cred,
-                                       GFP_KERNEL);
-                       if (node)
-                               mirror_ds = FF_LAYOUT_MIRROR_DS(node);
-
-                       /* check for race with another call to this function */
-                       if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) &&
-                           mirror_ds != ERR_PTR(-ENODEV))
-                               nfs4_put_deviceid_node(node);
-               } else
-                       goto outerr;
-       }
-
-       if (IS_ERR(mirror->mirror_ds))
-               goto outerr;
-
-       if (mirror->mirror_ds->ds == NULL) {
-               struct nfs4_deviceid_node *devid;
-               devid = &mirror->mirror_ds->id_node;
-               ff_layout_mark_devid_invalid(lseg, devid);
-               return false;
-       }
-       return true;
-outerr:
-       pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg);
-       return false;
-}
-
 static void extend_ds_error(struct nfs4_ff_layout_ds_err *err,
                            u64 offset, u64 length)
 {
@@ -326,7 +276,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
        spin_lock(&flo->generic_hdr.plh_inode->i_lock);
        ff_layout_add_ds_error_locked(flo, dserr);
        spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
-
        return 0;
 }
 
@@ -353,46 +302,54 @@ ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode)
 }
 
 struct nfs_fh *
-nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx)
+nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror)
 {
-       struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx);
-       struct nfs_fh *fh = NULL;
-
-       if (!ff_layout_mirror_valid(lseg, mirror, false)) {
-               pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n",
-                       __func__, mirror_idx);
-               goto out;
-       }
-
        /* FIXME: For now assume there is only 1 version available for the DS */
-       fh = &mirror->fh_versions[0];
-out:
-       return fh;
+       return &mirror->fh_versions[0];
 }
 
-int
-nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg,
-                               u32 mirror_idx,
-                               nfs4_stateid *stateid)
+void
+nfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror *mirror,
+               nfs4_stateid *stateid)
 {
-       struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx);
+       if (nfs4_ff_layout_ds_version(mirror) == 4)
+               nfs4_stateid_copy(stateid, &mirror->stateid);
+}
 
-       if (!ff_layout_mirror_valid(lseg, mirror, false)) {
-               pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n",
-                       __func__, mirror_idx);
-               goto out;
+static bool
+ff_layout_init_mirror_ds(struct pnfs_layout_hdr *lo,
+                        struct nfs4_ff_layout_mirror *mirror)
+{
+       if (mirror == NULL)
+               goto outerr;
+       if (mirror->mirror_ds == NULL) {
+               struct nfs4_deviceid_node *node;
+               struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV);
+
+               node = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode),
+                               &mirror->devid, lo->plh_lc_cred,
+                               GFP_KERNEL);
+               if (node)
+                       mirror_ds = FF_LAYOUT_MIRROR_DS(node);
+
+               /* check for race with another call to this function */
+               if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) &&
+                   mirror_ds != ERR_PTR(-ENODEV))
+                       nfs4_put_deviceid_node(node);
        }
 
-       nfs4_stateid_copy(stateid, &mirror->stateid);
-       return 1;
-out:
-       return 0;
+       if (IS_ERR(mirror->mirror_ds))
+               goto outerr;
+
+       return true;
+outerr:
+       return false;
 }
 
 /**
  * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call
  * @lseg: the layout segment we're operating on
- * @ds_idx: index of the DS to use
+ * @mirror: layout mirror describing the DS to use
  * @fail_return: return layout on connect failure?
  *
  * Try to prepare a DS connection to accept an RPC call. This involves
@@ -407,26 +364,18 @@ out:
  * Returns a pointer to a connected DS object on success or NULL on failure.
  */
 struct nfs4_pnfs_ds *
-nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
+nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
+                         struct nfs4_ff_layout_mirror *mirror,
                          bool fail_return)
 {
-       struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
        struct nfs4_pnfs_ds *ds = NULL;
-       struct nfs4_deviceid_node *devid;
        struct inode *ino = lseg->pls_layout->plh_inode;
        struct nfs_server *s = NFS_SERVER(ino);
        unsigned int max_payload;
        int status;
 
-       if (!ff_layout_mirror_valid(lseg, mirror, true)) {
-               pr_err_ratelimited("NFS: %s: No data server for offset index %d\n",
-                       __func__, ds_idx);
-               goto out;
-       }
-
-       devid = &mirror->mirror_ds->id_node;
-       if (ff_layout_test_devid_unavailable(devid))
-               goto out_fail;
+       if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror))
+               goto noconnect;
 
        ds = mirror->mirror_ds->ds;
        /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
@@ -437,8 +386,8 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
        /* FIXME: For now we assume the server sent only one version of NFS
         * to use for the DS.
         */
-       status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
-                            dataserver_retrans,
+       status = nfs4_pnfs_ds_connect(s, ds, &mirror->mirror_ds->id_node,
+                            dataserver_timeo, dataserver_retrans,
                             mirror->mirror_ds->ds_versions[0].version,
                             mirror->mirror_ds->ds_versions[0].minor_version);
 
@@ -453,11 +402,12 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
                        mirror->mirror_ds->ds_versions[0].wsize = max_payload;
                goto out;
        }
-out_fail:
+noconnect:
        ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
                                 mirror, lseg->pls_range.offset,
                                 lseg->pls_range.length, NFS4ERR_NXIO,
                                 OP_ILLEGAL, GFP_NOIO);
+       ff_layout_send_layouterror(lseg);
        if (fail_return || !ff_layout_has_available_ds(lseg))
                pnfs_error_mark_layout_for_return(ino, lseg);
        ds = NULL;
@@ -466,14 +416,14 @@ out:
 }
 
 const struct cred *
-ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx,
+ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror,
+                     const struct pnfs_layout_range *range,
                      const struct cred *mdscred)
 {
-       struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
        const struct cred *cred;
 
        if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) {
-               cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode);
+               cred = ff_layout_get_mirror_cred(mirror, range->iomode);
                if (!cred)
                        cred = get_cred(mdscred);
        } else {
@@ -483,15 +433,18 @@ ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx,
 }
 
 /**
-* Find or create a DS rpc client with th MDS server rpc client auth flavor
-* in the nfs_client cl_ds_clients list.
-*/
+ * nfs4_ff_find_or_create_ds_client - Find or create a DS rpc client
+ * @mirror: pointer to the mirror
+ * @ds_clp: nfs_client for the DS
+ * @inode: pointer to inode
+ *
+ * Find or create a DS rpc client with th MDS server rpc client auth flavor
+ * in the nfs_client cl_ds_clients list.
+ */
 struct rpc_clnt *
-nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, u32 ds_idx,
+nfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror *mirror,
                                 struct nfs_client *ds_clp, struct inode *inode)
 {
-       struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
-
        switch (mirror->mirror_ds->ds_versions[0].version) {
        case 3:
                /* For NFSv3 DS, flavor is set when creating DS connections */
@@ -608,7 +561,7 @@ static bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg)
                        if (IS_ERR(mirror->mirror_ds))
                                continue;
                        devid = &mirror->mirror_ds->id_node;
-                       if (!ff_layout_test_devid_unavailable(devid))
+                       if (!nfs4_test_deviceid_unavailable(devid))
                                return true;
                }
        }
@@ -629,7 +582,7 @@ static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg)
                if (!mirror->mirror_ds)
                        continue;
                devid = &mirror->mirror_ds->id_node;
-               if (ff_layout_test_devid_unavailable(devid))
+               if (nfs4_test_deviceid_unavailable(devid))
                        return false;
        }
 
index 094775ea0781e302facf6da6bab1c43188de2df5..414a90d48493d99aab7492b9b7932f838675c90c 100644 (file)
@@ -143,6 +143,7 @@ EXPORT_SYMBOL_GPL(nfs_sync_inode);
 
 /**
  * nfs_sync_mapping - helper to flush all mmapped dirty data to disk
+ * @mapping: pointer to struct address_space
  */
 int nfs_sync_mapping(struct address_space *mapping)
 {
@@ -1184,8 +1185,8 @@ int nfs_attribute_cache_expired(struct inode *inode)
 
 /**
  * nfs_revalidate_inode - Revalidate the inode attributes
- * @server - pointer to nfs_server struct
- * @inode - pointer to inode struct
+ * @server: pointer to nfs_server struct
+ * @inode: pointer to inode struct
  *
  * Updates inode attribute information by retrieving the data from the server.
  */
@@ -1255,8 +1256,8 @@ out:
 
 /**
  * nfs_revalidate_mapping - Revalidate the pagecache
- * @inode - pointer to host inode
- * @mapping - pointer to mapping
+ * @inode: pointer to host inode
+ * @mapping: pointer to mapping
  */
 int nfs_revalidate_mapping(struct inode *inode,
                struct address_space *mapping)
@@ -1371,8 +1372,8 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 
 /**
  * nfs_check_inode_attributes - verify consistency of the inode attribute cache
- * @inode - pointer to inode
- * @fattr - updated attributes
+ * @inode: pointer to inode
+ * @fattr: updated attributes
  *
  * Verifies the attribute cache. If we have just changed the attributes,
  * so that fattr carries weak cache consistency data, then it may
@@ -1572,8 +1573,8 @@ EXPORT_SYMBOL_GPL(_nfs_display_fhandle);
 
 /**
  * nfs_inode_attrs_need_update - check if the inode attributes need updating
- * @inode - pointer to inode
- * @fattr - attributes
+ * @inode: pointer to inode
+ * @fattr: attributes
  *
  * Attempt to divine whether or not an RPC call reply carrying stale
  * attributes got scheduled after another call carrying updated ones.
@@ -1614,8 +1615,8 @@ static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr
 
 /**
  * nfs_refresh_inode - try to update the inode attribute cache
- * @inode - pointer to inode
- * @fattr - updated attributes
+ * @inode: pointer to inode
+ * @fattr: updated attributes
  *
  * Check that an RPC call that returned attributes has not overlapped with
  * other recent updates of the inode metadata, then decide whether it is
@@ -1649,8 +1650,8 @@ static int nfs_post_op_update_inode_locked(struct inode *inode,
 
 /**
  * nfs_post_op_update_inode - try to update the inode attribute cache
- * @inode - pointer to inode
- * @fattr - updated attributes
+ * @inode: pointer to inode
+ * @fattr: updated attributes
  *
  * After an operation that has changed the inode metadata, mark the
  * attribute cache as being invalid, then try to update it.
@@ -1679,8 +1680,8 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
 
 /**
  * nfs_post_op_update_inode_force_wcc_locked - update the inode attribute cache
- * @inode - pointer to inode
- * @fattr - updated attributes
+ * @inode: pointer to inode
+ * @fattr: updated attributes
  *
  * After an operation that has changed the inode metadata, mark the
  * attribute cache as being invalid, then try to update it. Fake up
@@ -1731,8 +1732,8 @@ out_noforce:
 
 /**
  * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
- * @inode - pointer to inode
- * @fattr - updated attributes
+ * @inode: pointer to inode
+ * @fattr: updated attributes
  *
  * After an operation that has changed the inode metadata, mark the
  * attribute cache as being invalid, then try to update it. Fake up
index b1e577302518f404e9914f1f1d9f539b1e30194c..c7cf23ae6597bceaac9b7e427356807200d49365 100644 (file)
@@ -69,7 +69,8 @@ struct nfs_clone_mount {
  * Maximum number of pages that readdir can use for creating
  * a vmapped array of pages.
  */
-#define NFS_MAX_READDIR_PAGES 8
+#define NFS_MAX_READDIR_PAGES 64
+#define NFS_MAX_READDIR_RAPAGES 8
 
 struct nfs_client_initdata {
        unsigned long init_flags;
@@ -755,6 +756,7 @@ static inline bool nfs_error_is_fatal(int err)
 {
        switch (err) {
        case -ERESTARTSYS:
+       case -EINTR:
        case -EACCES:
        case -EDQUOT:
        case -EFBIG:
@@ -763,6 +765,7 @@ static inline bool nfs_error_is_fatal(int err)
        case -EROFS:
        case -ESTALE:
        case -E2BIG:
+       case -ENOMEM:
                return true;
        default:
                return false;
index 9034b4926909a91a86e7cbf95cd227178f953270..5088fda9b453bd5ff444373d49997e9a7da75a42 100644 (file)
@@ -25,7 +25,7 @@ static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode)
 
 /**
  * nfs_start_io_read - declare the file is being used for buffered reads
- * @inode - file inode
+ * @inode: file inode
  *
  * Declare that a buffered read operation is about to start, and ensure
  * that we block all direct I/O.
@@ -56,7 +56,7 @@ nfs_start_io_read(struct inode *inode)
 
 /**
  * nfs_end_io_read - declare that the buffered read operation is done
- * @inode - file inode
+ * @inode: file inode
  *
  * Declare that a buffered read operation is done, and release the shared
  * lock on inode->i_rwsem.
@@ -69,7 +69,7 @@ nfs_end_io_read(struct inode *inode)
 
 /**
  * nfs_start_io_write - declare the file is being used for buffered writes
- * @inode - file inode
+ * @inode: file inode
  *
  * Declare that a buffered read operation is about to start, and ensure
  * that we block all direct I/O.
@@ -83,7 +83,7 @@ nfs_start_io_write(struct inode *inode)
 
 /**
  * nfs_end_io_write - declare that the buffered write operation is done
- * @inode - file inode
+ * @inode: file inode
  *
  * Declare that a buffered write operation is done, and release the
  * lock on inode->i_rwsem.
@@ -105,7 +105,7 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode)
 
 /**
  * nfs_end_io_direct - declare the file is being used for direct i/o
- * @inode - file inode
+ * @inode: file inode
  *
  * Declare that a direct I/O operation is about to start, and ensure
  * that we block all buffered I/O.
@@ -136,7 +136,7 @@ nfs_start_io_direct(struct inode *inode)
 
 /**
  * nfs_end_io_direct - declare that the direct i/o operation is done
- * @inode - file inode
+ * @inode: file inode
  *
  * Declare that a direct I/O operation is done, and release the shared
  * lock on inode->i_rwsem.
index e5686be67be8d361a32344e3aaaae235d739ffd7..15f099a24c29b8fb06c073085e250e0526ed570a 100644 (file)
@@ -221,10 +221,10 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
 
 /**
  * nfs_do_submount - set up mountpoint when crossing a filesystem boundary
- * @dentry - parent directory
- * @fh - filehandle for new root dentry
- * @fattr - attributes for new root inode
- * @authflavor - security flavor to use when performing the mount
+ * @dentry: parent directory
+ * @fh: filehandle for new root dentry
+ * @fattr: attributes for new root inode
+ * @authflavor: security flavor to use when performing the mount
  *
  */
 struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
index 350675e3ed479e11af839fcbbc9b99e1d972b068..a7ed29de0a406274390e3284701b46ef1d389e64 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/nfs.h>
 #include <linux/nfs2.h>
 #include <linux/nfs_fs.h>
+#include "nfstrace.h"
 #include "internal.h"
 
 #define NFSDBG_FACILITY                NFSDBG_XDR
 
 #define NFS_attrstat_sz                (1+NFS_fattr_sz)
 #define NFS_diropres_sz                (1+NFS_fhandle_sz+NFS_fattr_sz)
-#define NFS_readlinkres_sz     (2)
-#define NFS_readres_sz         (1+NFS_fattr_sz+1)
+#define NFS_readlinkres_sz     (2+1)
+#define NFS_readres_sz         (1+NFS_fattr_sz+1+1)
 #define NFS_writeres_sz         (NFS_attrstat_sz)
 #define NFS_stat_sz            (1)
-#define NFS_readdirres_sz      (1)
+#define NFS_readdirres_sz      (1+1)
 #define NFS_statfsres_sz       (1+NFS_info_sz)
 
 static int nfs_stat_to_errno(enum nfs_stat);
 
-/*
- * While encoding arguments, set up the reply buffer in advance to
- * receive reply data directly into the page cache.
- */
-static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
-                                unsigned int base, unsigned int len,
-                                unsigned int bufsize)
-{
-       struct rpc_auth *auth = req->rq_cred->cr_auth;
-       unsigned int replen;
-
-       replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize;
-       xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len);
-}
-
-/*
- * Handle decode buffer overflows out-of-line.
- */
-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
-{
-       dprintk("NFS: %s prematurely hit the end of our receive buffer. "
-               "Remaining buffer length is %tu words.\n",
-               func, xdr->end - xdr->p);
-}
-
-
 /*
  * Encode/decode NFSv2 basic data types
  *
@@ -110,8 +85,8 @@ static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result)
        __be32 *p;
 
        p = xdr_inline_decode(xdr, 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        count = be32_to_cpup(p);
        recvd = xdr_read_pages(xdr, count);
        if (unlikely(count > recvd))
@@ -125,9 +100,6 @@ out_cheating:
                "count %u > recvd %u\n", count, recvd);
        count = recvd;
        goto out;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 /*
@@ -157,13 +129,16 @@ static int decode_stat(struct xdr_stream *xdr, enum nfs_stat *status)
        __be32 *p;
 
        p = xdr_inline_decode(xdr, 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
+       if (unlikely(*p != cpu_to_be32(NFS_OK)))
+               goto out_status;
+       *status = 0;
+       return 0;
+out_status:
        *status = be32_to_cpup(p);
+       trace_nfs_xdr_status((int)*status);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 /*
@@ -205,14 +180,11 @@ static int decode_fhandle(struct xdr_stream *xdr, struct nfs_fh *fh)
        __be32 *p;
 
        p = xdr_inline_decode(xdr, NFS2_FHSIZE);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        fh->size = NFS2_FHSIZE;
        memcpy(fh->data, p, NFS2_FHSIZE);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 /*
@@ -282,8 +254,8 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
        __be32 *p;
 
        p = xdr_inline_decode(xdr, NFS_fattr_sz << 2);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
 
        fattr->valid |= NFS_ATTR_FATTR_V2;
 
@@ -325,9 +297,6 @@ out_uid:
 out_gid:
        dprintk("NFS: returned invalid gid\n");
        return -EINVAL;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 /*
@@ -416,23 +385,20 @@ static int decode_filename_inline(struct xdr_stream *xdr,
        u32 count;
 
        p = xdr_inline_decode(xdr, 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        count = be32_to_cpup(p);
        if (count > NFS3_MAXNAMLEN)
                goto out_nametoolong;
        p = xdr_inline_decode(xdr, count);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        *name = (const char *)p;
        *length = count;
        return 0;
 out_nametoolong:
        dprintk("NFS: returned filename too long: %u\n", count);
        return -ENAMETOOLONG;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 /*
@@ -455,8 +421,8 @@ static int decode_path(struct xdr_stream *xdr)
        __be32 *p;
 
        p = xdr_inline_decode(xdr, 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        length = be32_to_cpup(p);
        if (unlikely(length >= xdr->buf->page_len || length > NFS_MAXPATHLEN))
                goto out_size;
@@ -472,9 +438,6 @@ out_cheating:
        dprintk("NFS: server cheating in pathname result: "
                "length %u > received %u\n", length, recvd);
        return -EIO;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 /*
@@ -615,8 +578,8 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
        const struct nfs_readlinkargs *args = data;
 
        encode_fhandle(xdr, args->fh);
-       prepare_reply_buffer(req, args->pages, args->pgbase,
-                                       args->pglen, NFS_readlinkres_sz);
+       rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+                               args->pglen, NFS_readlinkres_sz);
 }
 
 /*
@@ -651,8 +614,8 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
        const struct nfs_pgio_args *args = data;
 
        encode_readargs(xdr, args);
-       prepare_reply_buffer(req, args->pages, args->pgbase,
-                                       args->count, NFS_readres_sz);
+       rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+                               args->count, NFS_readres_sz);
        req->rq_rcv_buf.flags |= XDRBUF_READ;
 }
 
@@ -809,8 +772,8 @@ static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req,
        const struct nfs_readdirargs *args = data;
 
        encode_readdirargs(xdr, args);
-       prepare_reply_buffer(req, args->pages, 0,
-                                       args->count, NFS_readdirres_sz);
+       rpc_prepare_reply_pages(req, args->pages, 0,
+                               args->count, NFS_readdirres_sz);
 }
 
 /*
@@ -951,12 +914,12 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
        int error;
 
        p = xdr_inline_decode(xdr, 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EAGAIN;
        if (*p++ == xdr_zero) {
                p = xdr_inline_decode(xdr, 4);
-               if (unlikely(p == NULL))
-                       goto out_overflow;
+               if (unlikely(!p))
+                       return -EAGAIN;
                if (*p++ == xdr_zero)
                        return -EAGAIN;
                entry->eof = 1;
@@ -964,8 +927,8 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
        }
 
        p = xdr_inline_decode(xdr, 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EAGAIN;
        entry->ino = be32_to_cpup(p);
 
        error = decode_filename_inline(xdr, &entry->name, &entry->len);
@@ -978,17 +941,13 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
         */
        entry->prev_cookie = entry->cookie;
        p = xdr_inline_decode(xdr, 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EAGAIN;
        entry->cookie = be32_to_cpup(p);
 
        entry->d_type = DT_UNKNOWN;
 
        return 0;
-
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EAGAIN;
 }
 
 /*
@@ -1052,17 +1011,14 @@ static int decode_info(struct xdr_stream *xdr, struct nfs2_fsstat *result)
        __be32 *p;
 
        p = xdr_inline_decode(xdr, NFS_info_sz << 2);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        result->tsize  = be32_to_cpup(p++);
        result->bsize  = be32_to_cpup(p++);
        result->blocks = be32_to_cpup(p++);
        result->bfree  = be32_to_cpup(p++);
        result->bavail = be32_to_cpup(p);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int nfs2_xdr_dec_statfsres(struct rpc_rqst *req, struct xdr_stream *xdr,
index 9fce18548f7e848be7fa395766fb2c488dd724af..c5c3fc6e6c600b427c991d4be3a83879bbd2b48c 100644 (file)
@@ -222,8 +222,6 @@ static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
        switch (status) {
                case 0:
                        status = nfs_refresh_inode(inode, fattr);
-                       set_cached_acl(inode, ACL_TYPE_ACCESS, acl);
-                       set_cached_acl(inode, ACL_TYPE_DEFAULT, dfacl);
                        break;
                case -EPFNOSUPPORT:
                case -EPROTONOSUPPORT:
index 78df4eb60f85b50561710e4c7285958c0674f947..110358f4986d781aa964416212e15fc112a2305e 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/nfs3.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfsacl.h>
+#include "nfstrace.h"
 #include "internal.h"
 
 #define NFSDBG_FACILITY                NFSDBG_XDR
 #define NFS3_removeres_sz      (NFS3_setattrres_sz)
 #define NFS3_lookupres_sz      (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
 #define NFS3_accessres_sz      (1+NFS3_post_op_attr_sz+1)
-#define NFS3_readlinkres_sz    (1+NFS3_post_op_attr_sz+1)
-#define NFS3_readres_sz                (1+NFS3_post_op_attr_sz+3)
+#define NFS3_readlinkres_sz    (1+NFS3_post_op_attr_sz+1+1)
+#define NFS3_readres_sz                (1+NFS3_post_op_attr_sz+3+1)
 #define NFS3_writeres_sz       (1+NFS3_wcc_data_sz+4)
 #define NFS3_createres_sz      (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
 #define NFS3_renameres_sz      (1+(2 * NFS3_wcc_data_sz))
 #define NFS3_linkres_sz                (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
-#define NFS3_readdirres_sz     (1+NFS3_post_op_attr_sz+2)
+#define NFS3_readdirres_sz     (1+NFS3_post_op_attr_sz+2+1)
 #define NFS3_fsstatres_sz      (1+NFS3_post_op_attr_sz+13)
 #define NFS3_fsinfores_sz      (1+NFS3_post_op_attr_sz+12)
 #define NFS3_pathconfres_sz    (1+NFS3_post_op_attr_sz+6)
@@ -84,7 +85,7 @@
 #define ACL3_setaclargs_sz     (NFS3_fh_sz+1+ \
                                XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
 #define ACL3_getaclres_sz      (1+NFS3_post_op_attr_sz+1+ \
-                               XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
+                               XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
 #define ACL3_setaclres_sz      (1+NFS3_post_op_attr_sz)
 
 static int nfs3_stat_to_errno(enum nfs_stat);
@@ -103,32 +104,6 @@ static const umode_t nfs_type2fmt[] = {
        [NF3FIFO] = S_IFIFO,
 };
 
-/*
- * While encoding arguments, set up the reply buffer in advance to
- * receive reply data directly into the page cache.
- */
-static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
-                                unsigned int base, unsigned int len,
-                                unsigned int bufsize)
-{
-       struct rpc_auth *auth = req->rq_cred->cr_auth;
-       unsigned int replen;
-
-       replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize;
-       xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len);
-}
-
-/*
- * Handle decode buffer overflows out-of-line.
- */
-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
-{
-       dprintk("NFS: %s prematurely hit the end of our receive buffer. "
-               "Remaining buffer length is %tu words.\n",
-               func, xdr->end - xdr->p);
-}
-
-
 /*
  * Encode/decode NFSv3 basic data types
  *
@@ -151,13 +126,10 @@ static int decode_uint32(struct xdr_stream *xdr, u32 *value)
        __be32 *p;
 
        p = xdr_inline_decode(xdr, 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        *value = be32_to_cpup(p);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_uint64(struct xdr_stream *xdr, u64 *value)
@@ -165,13 +137,10 @@ static int decode_uint64(struct xdr_stream *xdr, u64 *value)
        __be32 *p;
 
        p = xdr_inline_decode(xdr, 8);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        xdr_decode_hyper(p, value);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 /*
@@ -211,14 +180,14 @@ static int decode_inline_filename3(struct xdr_stream *xdr,
        u32 count;
 
        p = xdr_inline_decode(xdr, 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        count = be32_to_cpup(p);
        if (count > NFS3_MAXNAMLEN)
                goto out_nametoolong;
        p = xdr_inline_decode(xdr, count);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        *name = (const char *)p;
        *length = count;
        return 0;
@@ -226,9 +195,6 @@ static int decode_inline_filename3(struct xdr_stream *xdr,
 out_nametoolong:
        dprintk("NFS: returned filename too long: %u\n", count);
        return -ENAMETOOLONG;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 /*
@@ -249,8 +215,8 @@ static int decode_nfspath3(struct xdr_stream *xdr)
        __be32 *p;
 
        p = xdr_inline_decode(xdr, 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        count = be32_to_cpup(p);
        if (unlikely(count >= xdr->buf->page_len || count > NFS3_MAXPATHLEN))
                goto out_nametoolong;
@@ -267,9 +233,6 @@ out_cheating:
        dprintk("NFS: server cheating in pathname result: "
                "count %u > recvd %u\n", count, recvd);
        return -EIO;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 /*
@@ -303,13 +266,10 @@ static int decode_cookieverf3(struct xdr_stream *xdr, __be32 *verifier)
        __be32 *p;
 
        p = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        memcpy(verifier, p, NFS3_COOKIEVERFSIZE);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 /*
@@ -330,13 +290,10 @@ static int decode_writeverf3(struct xdr_stream *xdr, struct nfs_write_verifier *
        __be32 *p;
 
        p = xdr_inline_decode(xdr, NFS3_WRITEVERFSIZE);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        memcpy(verifier->data, p, NFS3_WRITEVERFSIZE);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 /*
@@ -364,13 +321,16 @@ static int decode_nfsstat3(struct xdr_stream *xdr, enum nfs_stat *status)
        __be32 *p;
 
        p = xdr_inline_decode(xdr, 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
+       if (unlikely(*p != cpu_to_be32(NFS3_OK)))
+               goto out_status;
+       *status = 0;
+       return 0;
+out_status:
        *status = be32_to_cpup(p);
+       trace_nfs_xdr_status((int)*status);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 /*
@@ -453,23 +413,20 @@ static int decode_nfs_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
        __be32 *p;
 
        p = xdr_inline_decode(xdr, 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        length = be32_to_cpup(p++);
        if (unlikely(length > NFS3_FHSIZE))
                goto out_toobig;
        p = xdr_inline_decode(xdr, length);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        fh->size = length;
        memcpy(fh->data, p, length);
        return 0;
 out_toobig:
        dprintk("NFS: file handle size (%u) too big\n", length);
        return -E2BIG;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static void zero_nfs_fh3(struct nfs_fh *fh)
@@ -655,8 +612,8 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
        __be32 *p;
 
        p = xdr_inline_decode(xdr, NFS3_fattr_sz << 2);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
 
        p = xdr_decode_ftype3(p, &fmode);
 
@@ -690,9 +647,6 @@ out_uid:
 out_gid:
        dprintk("NFS: returned invalid gid\n");
        return -EINVAL;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 /*
@@ -710,14 +664,11 @@ static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
        __be32 *p;
 
        p = xdr_inline_decode(xdr, 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        if (*p != xdr_zero)
                return decode_fattr3(xdr, fattr);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 /*
@@ -733,8 +684,8 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
        __be32 *p;
 
        p = xdr_inline_decode(xdr, NFS3_wcc_attr_sz << 2);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
 
        fattr->valid |= NFS_ATTR_FATTR_PRESIZE
                | NFS_ATTR_FATTR_PRECHANGE
@@ -747,9 +698,6 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
        fattr->pre_change_attr = nfs_timespec_to_change_attr(&fattr->pre_ctime);
 
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 /*
@@ -773,14 +721,11 @@ static int decode_pre_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
        __be32 *p;
 
        p = xdr_inline_decode(xdr, 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        if (*p != xdr_zero)
                return decode_wcc_attr(xdr, fattr);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr)
@@ -808,15 +753,12 @@ out:
 static int decode_post_op_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
 {
        __be32 *p = xdr_inline_decode(xdr, 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        if (*p != xdr_zero)
                return decode_nfs_fh3(xdr, fh);
        zero_nfs_fh3(fh);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 /*
@@ -953,8 +895,8 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
        const struct nfs3_readlinkargs *args = data;
 
        encode_nfs_fh3(xdr, args->fh);
-       prepare_reply_buffer(req, args->pages, args->pgbase,
-                                       args->pglen, NFS3_readlinkres_sz);
+       rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+                               args->pglen, NFS3_readlinkres_sz);
 }
 
 /*
@@ -986,8 +928,8 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
        unsigned int replen = args->replen ? args->replen : NFS3_readres_sz;
 
        encode_read3args(xdr, args);
-       prepare_reply_buffer(req, args->pages, args->pgbase,
-                                       args->count, replen);
+       rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+                               args->count, replen);
        req->rq_rcv_buf.flags |= XDRBUF_READ;
 }
 
@@ -1279,7 +1221,7 @@ static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req,
        const struct nfs3_readdirargs *args = data;
 
        encode_readdir3args(xdr, args);
-       prepare_reply_buffer(req, args->pages, 0,
+       rpc_prepare_reply_pages(req, args->pages, 0,
                                args->count, NFS3_readdirres_sz);
 }
 
@@ -1321,7 +1263,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
        const struct nfs3_readdirargs *args = data;
 
        encode_readdirplus3args(xdr, args);
-       prepare_reply_buffer(req, args->pages, 0,
+       rpc_prepare_reply_pages(req, args->pages, 0,
                                args->count, NFS3_readdirres_sz);
 }
 
@@ -1366,7 +1308,7 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
        encode_nfs_fh3(xdr, args->fh);
        encode_uint32(xdr, args->mask);
        if (args->mask & (NFS_ACL | NFS_DFACL)) {
-               prepare_reply_buffer(req, args->pages, 0,
+               rpc_prepare_reply_pages(req, args->pages, 0,
                                        NFSACL_MAXPAGES << PAGE_SHIFT,
                                        ACL3_getaclres_sz);
                req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
@@ -1643,8 +1585,8 @@ static int decode_read3resok(struct xdr_stream *xdr,
        __be32 *p;
 
        p = xdr_inline_decode(xdr, 4 + 4 + 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        count = be32_to_cpup(p++);
        eof = be32_to_cpup(p++);
        ocount = be32_to_cpup(p++);
@@ -1667,9 +1609,6 @@ out_cheating:
        count = recvd;
        eof = 0;
        goto out;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
@@ -1690,7 +1629,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
        result->op_status = status;
        if (status != NFS3_OK)
                goto out_status;
-       result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
+       result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
        error = decode_read3resok(xdr, result);
 out:
        return error;
@@ -1731,22 +1670,18 @@ static int decode_write3resok(struct xdr_stream *xdr,
        __be32 *p;
 
        p = xdr_inline_decode(xdr, 4 + 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        result->count = be32_to_cpup(p++);
        result->verf->committed = be32_to_cpup(p++);
        if (unlikely(result->verf->committed > NFS_FILE_SYNC))
                goto out_badvalue;
        if (decode_writeverf3(xdr, &result->verf->verifier))
-               goto out_eio;
+               return -EIO;
        return result->count;
 out_badvalue:
        dprintk("NFS: bad stable_how value: %u\n", result->verf->committed);
        return -EIO;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-out_eio:
-       return -EIO;
 }
 
 static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
@@ -2010,12 +1945,12 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
        u64 new_cookie;
 
        p = xdr_inline_decode(xdr, 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EAGAIN;
        if (*p == xdr_zero) {
                p = xdr_inline_decode(xdr, 4);
-               if (unlikely(p == NULL))
-                       goto out_overflow;
+               if (unlikely(!p))
+                       return -EAGAIN;
                if (*p == xdr_zero)
                        return -EAGAIN;
                entry->eof = 1;
@@ -2051,8 +1986,8 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 
                /* In fact, a post_op_fh3: */
                p = xdr_inline_decode(xdr, 4);
-               if (unlikely(p == NULL))
-                       goto out_overflow;
+               if (unlikely(!p))
+                       return -EAGAIN;
                if (*p != xdr_zero) {
                        error = decode_nfs_fh3(xdr, entry->fh);
                        if (unlikely(error)) {
@@ -2069,9 +2004,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 
        return 0;
 
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EAGAIN;
 out_truncated:
        dprintk("NFS: directory entry contains invalid file handle\n");
        *entry = old;
@@ -2183,8 +2115,8 @@ static int decode_fsstat3resok(struct xdr_stream *xdr,
        __be32 *p;
 
        p = xdr_inline_decode(xdr, 8 * 6 + 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        p = xdr_decode_size3(p, &result->tbytes);
        p = xdr_decode_size3(p, &result->fbytes);
        p = xdr_decode_size3(p, &result->abytes);
@@ -2193,9 +2125,6 @@ static int decode_fsstat3resok(struct xdr_stream *xdr,
        xdr_decode_size3(p, &result->afiles);
        /* ignore invarsec */
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
@@ -2255,8 +2184,8 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr,
        __be32 *p;
 
        p = xdr_inline_decode(xdr, 4 * 7 + 8 + 8 + 4);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        result->rtmax  = be32_to_cpup(p++);
        result->rtpref = be32_to_cpup(p++);
        result->rtmult = be32_to_cpup(p++);
@@ -2270,9 +2199,6 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr,
        /* ignore properties */
        result->lease_time = 0;
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
@@ -2328,15 +2254,12 @@ static int decode_pathconf3resok(struct xdr_stream *xdr,
        __be32 *p;
 
        p = xdr_inline_decode(xdr, 4 * 6);
-       if (unlikely(p == NULL))
-               goto out_overflow;
+       if (unlikely(!p))
+               return -EIO;
        result->max_link = be32_to_cpup(p++);
        result->max_namelen = be32_to_cpup(p);
        /* ignore remaining fields */
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
index 19ec38f85ce0724fb11e8d879c9688a12b3929d4..901cca7542f944ce8829131d4262aee5529c0df0 100644 (file)
@@ -20,5 +20,8 @@ loff_t nfs42_proc_llseek(struct file *, loff_t, int);
 int nfs42_proc_layoutstats_generic(struct nfs_server *,
                                   struct nfs42_layoutstat_data *);
 int nfs42_proc_clone(struct file *, struct file *, loff_t, loff_t, loff_t);
+int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg,
+                          const struct nfs42_layout_error *errors,
+                          size_t n);
 
 #endif /* __LINUX_FS_NFS_NFS4_2_H */
index fed06fd9998d322a202befd46f8c2546dedbba84..ff6f85fb676b7c1094878b269d35f2f127ca5fb5 100644 (file)
@@ -672,6 +672,170 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server,
        return 0;
 }
 
+static struct nfs42_layouterror_data *
+nfs42_alloc_layouterror_data(struct pnfs_layout_segment *lseg, gfp_t gfp_flags)
+{
+       struct nfs42_layouterror_data *data;
+       struct inode *inode = lseg->pls_layout->plh_inode;
+
+       data = kzalloc(sizeof(*data), gfp_flags);
+       if (data) {
+               data->args.inode = data->inode = nfs_igrab_and_active(inode);
+               if (data->inode) {
+                       data->lseg = pnfs_get_lseg(lseg);
+                       if (data->lseg)
+                               return data;
+                       nfs_iput_and_deactive(data->inode);
+               }
+               kfree(data);
+       }
+       return NULL;
+}
+
+static void
+nfs42_free_layouterror_data(struct nfs42_layouterror_data *data)
+{
+       pnfs_put_lseg(data->lseg);
+       nfs_iput_and_deactive(data->inode);
+       kfree(data);
+}
+
+static void
+nfs42_layouterror_prepare(struct rpc_task *task, void *calldata)
+{
+       struct nfs42_layouterror_data *data = calldata;
+       struct inode *inode = data->inode;
+       struct nfs_server *server = NFS_SERVER(inode);
+       struct pnfs_layout_hdr *lo = data->lseg->pls_layout;
+       unsigned i;
+
+       spin_lock(&inode->i_lock);
+       if (!pnfs_layout_is_valid(lo)) {
+               spin_unlock(&inode->i_lock);
+               rpc_exit(task, 0);
+               return;
+       }
+       for (i = 0; i < data->args.num_errors; i++)
+               nfs4_stateid_copy(&data->args.errors[i].stateid,
+                               &lo->plh_stateid);
+       spin_unlock(&inode->i_lock);
+       nfs4_setup_sequence(server->nfs_client, &data->args.seq_args,
+                           &data->res.seq_res, task);
+}
+
+static void
+nfs42_layouterror_done(struct rpc_task *task, void *calldata)
+{
+       struct nfs42_layouterror_data *data = calldata;
+       struct inode *inode = data->inode;
+       struct pnfs_layout_hdr *lo = data->lseg->pls_layout;
+
+       if (!nfs4_sequence_done(task, &data->res.seq_res))
+               return;
+
+       switch (task->tk_status) {
+       case 0:
+               break;
+       case -NFS4ERR_BADHANDLE:
+       case -ESTALE:
+               pnfs_destroy_layout(NFS_I(inode));
+               break;
+       case -NFS4ERR_EXPIRED:
+       case -NFS4ERR_ADMIN_REVOKED:
+       case -NFS4ERR_DELEG_REVOKED:
+       case -NFS4ERR_STALE_STATEID:
+       case -NFS4ERR_BAD_STATEID:
+               spin_lock(&inode->i_lock);
+               if (pnfs_layout_is_valid(lo) &&
+                   nfs4_stateid_match(&data->args.errors[0].stateid,
+                                            &lo->plh_stateid)) {
+                       LIST_HEAD(head);
+
+                       /*
+                        * Mark the bad layout state as invalid, then retry
+                        * with the current stateid.
+                        */
+                       pnfs_mark_layout_stateid_invalid(lo, &head);
+                       spin_unlock(&inode->i_lock);
+                       pnfs_free_lseg_list(&head);
+                       nfs_commit_inode(inode, 0);
+               } else
+                       spin_unlock(&inode->i_lock);
+               break;
+       case -NFS4ERR_OLD_STATEID:
+               spin_lock(&inode->i_lock);
+               if (pnfs_layout_is_valid(lo) &&
+                   nfs4_stateid_match_other(&data->args.errors[0].stateid,
+                                       &lo->plh_stateid)) {
+                       /* Do we need to delay before resending? */
+                       if (!nfs4_stateid_is_newer(&lo->plh_stateid,
+                                               &data->args.errors[0].stateid))
+                               rpc_delay(task, HZ);
+                       rpc_restart_call_prepare(task);
+               }
+               spin_unlock(&inode->i_lock);
+               break;
+       case -ENOTSUPP:
+       case -EOPNOTSUPP:
+               NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTERROR;
+       }
+}
+
+static void
+nfs42_layouterror_release(void *calldata)
+{
+       struct nfs42_layouterror_data *data = calldata;
+
+       nfs42_free_layouterror_data(data);
+}
+
+static const struct rpc_call_ops nfs42_layouterror_ops = {
+       .rpc_call_prepare = nfs42_layouterror_prepare,
+       .rpc_call_done = nfs42_layouterror_done,
+       .rpc_release = nfs42_layouterror_release,
+};
+
+int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg,
+               const struct nfs42_layout_error *errors, size_t n)
+{
+       struct inode *inode = lseg->pls_layout->plh_inode;
+       struct nfs42_layouterror_data *data;
+       struct rpc_task *task;
+       struct rpc_message msg = {
+               .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTERROR],
+       };
+       struct rpc_task_setup task_setup = {
+               .rpc_message = &msg,
+               .callback_ops = &nfs42_layouterror_ops,
+               .flags = RPC_TASK_ASYNC,
+       };
+       unsigned int i;
+
+       if (!nfs_server_capable(inode, NFS_CAP_LAYOUTERROR))
+               return -EOPNOTSUPP;
+       if (n > NFS42_LAYOUTERROR_MAX)
+               return -EINVAL;
+       data = nfs42_alloc_layouterror_data(lseg, GFP_NOFS);
+       if (!data)
+               return -ENOMEM;
+       for (i = 0; i < n; i++) {
+               data->args.errors[i] = errors[i];
+               data->args.num_errors++;
+               data->res.num_errors++;
+       }
+       msg.rpc_argp = &data->args;
+       msg.rpc_resp = &data->res;
+       task_setup.callback_data = data;
+       task_setup.rpc_client = NFS_SERVER(inode)->client;
+       nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0, 0);
+       task = rpc_run_task(&task_setup);
+       if (IS_ERR(task))
+               return PTR_ERR(task);
+       rpc_put_task(task);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(nfs42_proc_layouterror);
+
 static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
                struct file *dst_f, struct nfs_lock_context *src_lock,
                struct nfs_lock_context *dst_lock, loff_t src_offset,
index 69f72ed2bf879310f0ece97fab1e4cccea75a402..aed865a8462963276e904d5782ab884743ff6a47 100644 (file)
                                        1 /* opaque devaddr4 length */ + \
                                        XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE))
 #define decode_layoutstats_maxsz       (op_decode_hdr_maxsz)
+#define encode_device_error_maxsz      (XDR_QUADLEN(NFS4_DEVICEID4_SIZE) + \
+                                       1 /* status */ + 1 /* opnum */)
+#define encode_layouterror_maxsz       (op_decode_hdr_maxsz + \
+                                       2 /* offset */ + \
+                                       2 /* length */ + \
+                                       encode_stateid_maxsz + \
+                                       1 /* Array size */ + \
+                                       encode_device_error_maxsz)
+#define decode_layouterror_maxsz       (op_decode_hdr_maxsz)
 #define encode_clone_maxsz             (encode_stateid_maxsz + \
                                        encode_stateid_maxsz + \
                                        2 /* src offset */ + \
 #define decode_clone_maxsz             (op_decode_hdr_maxsz)
 
 #define NFS4_enc_allocate_sz           (compound_encode_hdr_maxsz + \
+                                        encode_sequence_maxsz + \
                                         encode_putfh_maxsz + \
                                         encode_allocate_maxsz + \
                                         encode_getattr_maxsz)
 #define NFS4_dec_allocate_sz           (compound_decode_hdr_maxsz + \
+                                        decode_sequence_maxsz + \
                                         decode_putfh_maxsz + \
                                         decode_allocate_maxsz + \
                                         decode_getattr_maxsz)
 #define NFS4_enc_copy_sz               (compound_encode_hdr_maxsz + \
+                                        encode_sequence_maxsz + \
                                         encode_putfh_maxsz + \
                                         encode_savefh_maxsz + \
                                         encode_putfh_maxsz + \
                                         encode_copy_maxsz + \
                                         encode_commit_maxsz)
 #define NFS4_dec_copy_sz               (compound_decode_hdr_maxsz + \
+                                        decode_sequence_maxsz + \
                                         decode_putfh_maxsz + \
                                         decode_savefh_maxsz + \
                                         decode_putfh_maxsz + \
                                         decode_copy_maxsz + \
                                         decode_commit_maxsz)
 #define NFS4_enc_offload_cancel_sz     (compound_encode_hdr_maxsz + \
+                                        encode_sequence_maxsz + \
                                         encode_putfh_maxsz + \
                                         encode_offload_cancel_maxsz)
 #define NFS4_dec_offload_cancel_sz     (compound_decode_hdr_maxsz + \
+                                        decode_sequence_maxsz + \
                                         decode_putfh_maxsz + \
                                         decode_offload_cancel_maxsz)
 #define NFS4_enc_deallocate_sz         (compound_encode_hdr_maxsz + \
+                                        encode_sequence_maxsz + \
                                         encode_putfh_maxsz + \
                                         encode_deallocate_maxsz + \
                                         encode_getattr_maxsz)
 #define NFS4_dec_deallocate_sz         (compound_decode_hdr_maxsz + \
+                                        decode_sequence_maxsz + \
                                         decode_putfh_maxsz + \
                                         decode_deallocate_maxsz + \
                                         decode_getattr_maxsz)
 #define NFS4_enc_seek_sz               (compound_encode_hdr_maxsz + \
+                                        encode_sequence_maxsz + \
                                         encode_putfh_maxsz + \
                                         encode_seek_maxsz)
 #define NFS4_dec_seek_sz               (compound_decode_hdr_maxsz + \
+                                        decode_sequence_maxsz + \
                                         decode_putfh_maxsz + \
                                         decode_seek_maxsz)
 #define NFS4_enc_layoutstats_sz                (compound_encode_hdr_maxsz + \
                                         decode_sequence_maxsz + \
                                         decode_putfh_maxsz + \
                                         PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz)
+#define NFS4_enc_layouterror_sz                (compound_encode_hdr_maxsz + \
+                                        encode_sequence_maxsz + \
+                                        encode_putfh_maxsz + \
+                                        NFS42_LAYOUTERROR_MAX * \
+                                        encode_layouterror_maxsz)
+#define NFS4_dec_layouterror_sz                (compound_decode_hdr_maxsz + \
+                                        decode_sequence_maxsz + \
+                                        decode_putfh_maxsz + \
+                                        NFS42_LAYOUTERROR_MAX * \
+                                        decode_layouterror_maxsz)
 #define NFS4_enc_clone_sz              (compound_encode_hdr_maxsz + \
                                         encode_sequence_maxsz + \
                                         encode_putfh_maxsz + \
@@ -223,6 +252,34 @@ static void encode_clone(struct xdr_stream *xdr,
        xdr_encode_hyper(p, args->count);
 }
 
+static void encode_device_error(struct xdr_stream *xdr,
+                               const struct nfs42_device_error *error)
+{
+       __be32 *p;
+
+       p = reserve_space(xdr, NFS4_DEVICEID4_SIZE + 2*4);
+       p = xdr_encode_opaque_fixed(p, error->dev_id.data,
+                       NFS4_DEVICEID4_SIZE);
+       *p++ = cpu_to_be32(error->status);
+       *p = cpu_to_be32(error->opnum);
+}
+
+static void encode_layouterror(struct xdr_stream *xdr,
+                              const struct nfs42_layout_error *args,
+                              struct compound_hdr *hdr)
+{
+       __be32 *p;
+
+       encode_op_hdr(xdr, OP_LAYOUTERROR, decode_layouterror_maxsz, hdr);
+       p = reserve_space(xdr, 8 + 8);
+       p = xdr_encode_hyper(p, args->offset);
+       p = xdr_encode_hyper(p, args->length);
+       encode_nfs4_stateid(xdr, &args->stateid);
+       p = reserve_space(xdr, 4);
+       *p = cpu_to_be32(1);
+       encode_device_error(xdr, &args->errors[0]);
+}
+
 /*
  * Encode ALLOCATE request
  */
@@ -381,6 +438,27 @@ static void nfs4_xdr_enc_clone(struct rpc_rqst *req,
        encode_nops(&hdr);
 }
 
+/*
+ * Encode LAYOUTERROR request
+ */
+static void nfs4_xdr_enc_layouterror(struct rpc_rqst *req,
+                                    struct xdr_stream *xdr,
+                                    const void *data)
+{
+       const struct nfs42_layouterror_args *args = data;
+       struct compound_hdr hdr = {
+               .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+       };
+       int i;
+
+       encode_compound_hdr(xdr, req, &hdr);
+       encode_sequence(xdr, &args->seq_args, &hdr);
+       encode_putfh(xdr, NFS_FH(args->inode), &hdr);
+       for (i = 0; i < args->num_errors; i++)
+               encode_layouterror(xdr, &args->errors[i], &hdr);
+       encode_nops(&hdr);
+}
+
 static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
 {
        return decode_op_hdr(xdr, OP_ALLOCATE);
@@ -394,7 +472,7 @@ static int decode_write_response(struct xdr_stream *xdr,
 
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        count = be32_to_cpup(p);
        if (count > 1)
                return -EREMOTEIO;
@@ -402,18 +480,14 @@ static int decode_write_response(struct xdr_stream *xdr,
                status = decode_opaque_fixed(xdr, &res->stateid,
                                NFS4_STATEID_SIZE);
                if (unlikely(status))
-                       goto out_overflow;
+                       return -EIO;
        }
        p = xdr_inline_decode(xdr, 8 + 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        p = xdr_decode_hyper(p, &res->count);
        res->verifier.committed = be32_to_cpup(p);
        return decode_verifier(xdr, &res->verifier.verifier);
-
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_copy_requirements(struct xdr_stream *xdr,
@@ -422,14 +496,11 @@ static int decode_copy_requirements(struct xdr_stream *xdr,
 
        p = xdr_inline_decode(xdr, 4 + 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
 
        res->consecutive = be32_to_cpup(p++);
        res->synchronous = be32_to_cpup(p++);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_copy(struct xdr_stream *xdr, struct nfs42_copy_res *res)
@@ -474,15 +545,11 @@ static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
 
        p = xdr_inline_decode(xdr, 4 + 8);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
 
        res->sr_eof = be32_to_cpup(p++);
        p = xdr_decode_hyper(p, &res->sr_offset);
        return 0;
-
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_layoutstats(struct xdr_stream *xdr)
@@ -495,6 +562,11 @@ static int decode_clone(struct xdr_stream *xdr)
        return decode_op_hdr(xdr, OP_CLONE);
 }
 
+static int decode_layouterror(struct xdr_stream *xdr)
+{
+       return decode_op_hdr(xdr, OP_LAYOUTERROR);
+}
+
 /*
  * Decode ALLOCATE request
  */
@@ -704,4 +776,30 @@ out:
        return status;
 }
 
+/*
+ * Decode LAYOUTERROR request
+ */
+static int nfs4_xdr_dec_layouterror(struct rpc_rqst *rqstp,
+                                   struct xdr_stream *xdr,
+                                   void *data)
+{
+       struct nfs42_layouterror_res *res = data;
+       struct compound_hdr hdr;
+       int status, i;
+
+       status = decode_compound_hdr(xdr, &hdr);
+       if (status)
+               goto out;
+       status = decode_sequence(xdr, &res->seq_res, rqstp);
+       if (status)
+               goto out;
+       status = decode_putfh(xdr);
+
+       for (i = 0; i < res->num_errors && status == 0; i++)
+               status = decode_layouterror(xdr);
+out:
+       res->rpc_status = status;
+       return status;
+}
+
 #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */
index 2548405da1f79165bf40827c62592fcf80ee763e..1339ede979afd5e3dfcb0d3a371dea41d01696b4 100644 (file)
@@ -42,7 +42,7 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
 }
 
 #ifdef CONFIG_NFS_V4_1
-/**
+/*
  * Per auth flavor data server rpc clients
  */
 struct nfs4_ds_server {
@@ -51,7 +51,9 @@ struct nfs4_ds_server {
 };
 
 /**
- * Common lookup case for DS I/O
+ * nfs4_find_ds_client - Common lookup case for DS I/O
+ * @ds_clp: pointer to the DS's nfs_client
+ * @flavor: rpc auth flavour to match
  */
 static struct nfs4_ds_server *
 nfs4_find_ds_client(struct nfs_client *ds_clp, rpc_authflavor_t flavor)
@@ -118,9 +120,13 @@ nfs4_free_ds_server(struct nfs4_ds_server *dss)
 }
 
 /**
-* Find or create a DS rpc client with th MDS server rpc client auth flavor
-* in the nfs_client cl_ds_clients list.
-*/
+ * nfs4_find_or_create_ds_client - Find or create a DS rpc client
+ * @ds_clp: pointer to the DS's nfs_client
+ * @inode: pointer to the inode
+ *
+ * Find or create a DS rpc client with th MDS server rpc client auth flavor
+ * in the nfs_client cl_ds_clients list.
+ */
 struct rpc_clnt *
 nfs4_find_or_create_ds_client(struct nfs_client *ds_clp, struct inode *inode)
 {
@@ -145,7 +151,6 @@ static void
 nfs4_shutdown_ds_clients(struct nfs_client *clp)
 {
        struct nfs4_ds_server *dss;
-       LIST_HEAD(shutdown_list);
 
        while (!list_empty(&clp->cl_ds_clients)) {
                dss = list_entry(clp->cl_ds_clients.next,
@@ -284,7 +289,7 @@ static int nfs4_init_callback(struct nfs_client *clp)
 
 /**
  * nfs40_init_client - nfs_client initialization tasks for NFSv4.0
- * @clp - nfs_client to initialize
+ * @clp: nfs_client to initialize
  *
  * Returns zero on success, or a negative errno if some error occurred.
  */
@@ -312,7 +317,7 @@ int nfs40_init_client(struct nfs_client *clp)
 
 /**
  * nfs41_init_client - nfs_client initialization tasks for NFSv4.1+
- * @clp - nfs_client to initialize
+ * @clp: nfs_client to initialize
  *
  * Returns zero on success, or a negative errno if some error occurred.
  */
@@ -360,9 +365,7 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
  * nfs4_init_client - Initialise an NFS4 client record
  *
  * @clp: nfs_client to initialise
- * @timeparms: timeout parameters for underlying RPC transport
- * @ip_addr: callback IP address in presentation format
- * @authflavor: authentication flavor for underlying RPC transport
+ * @cl_init: pointer to nfs_client_initdata
  *
  * Returns pointer to an NFS client, or an ERR_PTR value.
  */
@@ -649,13 +652,13 @@ nfs4_check_server_scope(struct nfs41_server_scope *s1,
 
 /**
  * nfs4_detect_session_trunking - Checks for session trunking.
- *
- * Called after a successful EXCHANGE_ID on a multi-addr connection.
- * Upon success, add the transport.
- *
  * @clp:    original mount nfs_client
  * @res:    result structure from an exchange_id using the original mount
  *          nfs_client with a new multi_addr transport
+ * @xprt:   pointer to the transport to add.
+ *
+ * Called after a successful EXCHANGE_ID on a multi-addr connection.
+ * Upon success, add the transport.
  *
  * Returns zero on success, otherwise -EINVAL
  *
index 24f06dcc2b08eeabd9f6b14591584cf8676749c8..2e460c33ae4873389cd5ce6e78dd8162bd055bc8 100644 (file)
@@ -137,6 +137,7 @@ static size_t nfs_parse_server_name(char *string, size_t len,
 
 /**
  * nfs_find_best_sec - Find a security mechanism supported locally
+ * @clnt: pointer to rpc_clnt
  * @server: NFS server struct
  * @flavors: List of security tuples returned by SECINFO procedure
  *
@@ -288,8 +289,8 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
 
 /**
  * nfs_follow_referral - set up mountpoint when hitting a referral on moved error
- * @dentry - parent directory
- * @locations - array of NFSv4 server location information
+ * @dentry: parent directory
+ * @locations: array of NFSv4 server location information
  *
  */
 static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
index 557a5d63618394c2af4fdf7a7fb5b928ccbc2e47..4dbb0ee234324db3275de7c7a26fc3bcd040171a 100644 (file)
@@ -730,33 +730,41 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
        res->sr_slot = NULL;
 }
 
+static void nfs4_slot_sequence_record_sent(struct nfs4_slot *slot,
+               u32 seqnr)
+{
+       if ((s32)(seqnr - slot->seq_nr_highest_sent) > 0)
+               slot->seq_nr_highest_sent = seqnr;
+}
+static void nfs4_slot_sequence_acked(struct nfs4_slot *slot,
+               u32 seqnr)
+{
+       slot->seq_nr_highest_sent = seqnr;
+       slot->seq_nr_last_acked = seqnr;
+}
+
 static int nfs41_sequence_process(struct rpc_task *task,
                struct nfs4_sequence_res *res)
 {
        struct nfs4_session *session;
        struct nfs4_slot *slot = res->sr_slot;
        struct nfs_client *clp;
-       bool interrupted = false;
        int ret = 1;
 
        if (slot == NULL)
                goto out_noaction;
        /* don't increment the sequence number if the task wasn't sent */
-       if (!RPC_WAS_SENT(task))
+       if (!RPC_WAS_SENT(task) || slot->seq_done)
                goto out;
 
        session = slot->table->session;
 
-       if (slot->interrupted) {
-               if (res->sr_status != -NFS4ERR_DELAY)
-                       slot->interrupted = 0;
-               interrupted = true;
-       }
-
        trace_nfs4_sequence_done(session, res);
        /* Check the SEQUENCE operation status */
        switch (res->sr_status) {
        case 0:
+               /* Mark this sequence number as having been acked */
+               nfs4_slot_sequence_acked(slot, slot->seq_nr);
                /* Update the slot's sequence and clientid lease timer */
                slot->seq_done = 1;
                clp = session->clp;
@@ -771,9 +779,9 @@ static int nfs41_sequence_process(struct rpc_task *task,
                 * sr_status remains 1 if an RPC level error occurred.
                 * The server may or may not have processed the sequence
                 * operation..
-                * Mark the slot as having hosted an interrupted RPC call.
                 */
-               slot->interrupted = 1;
+               nfs4_slot_sequence_record_sent(slot, slot->seq_nr);
+               slot->seq_done = 1;
                goto out;
        case -NFS4ERR_DELAY:
                /* The server detected a resend of the RPC call and
@@ -784,6 +792,7 @@ static int nfs41_sequence_process(struct rpc_task *task,
                        __func__,
                        slot->slot_nr,
                        slot->seq_nr);
+               nfs4_slot_sequence_acked(slot, slot->seq_nr);
                goto out_retry;
        case -NFS4ERR_RETRY_UNCACHED_REP:
        case -NFS4ERR_SEQ_FALSE_RETRY:
@@ -791,6 +800,7 @@ static int nfs41_sequence_process(struct rpc_task *task,
                 * The server thinks we tried to replay a request.
                 * Retry the call after bumping the sequence ID.
                 */
+               nfs4_slot_sequence_acked(slot, slot->seq_nr);
                goto retry_new_seq;
        case -NFS4ERR_BADSLOT:
                /*
@@ -801,21 +811,28 @@ static int nfs41_sequence_process(struct rpc_task *task,
                        goto session_recover;
                goto retry_nowait;
        case -NFS4ERR_SEQ_MISORDERED:
+               nfs4_slot_sequence_record_sent(slot, slot->seq_nr);
                /*
-                * Was the last operation on this sequence interrupted?
-                * If so, retry after bumping the sequence number.
-                */
-               if (interrupted)
-                       goto retry_new_seq;
-               /*
-                * Could this slot have been previously retired?
-                * If so, then the server may be expecting seq_nr = 1!
+                * Were one or more calls using this slot interrupted?
+                * If the server never received the request, then our
+                * transmitted slot sequence number may be too high.
                 */
-               if (slot->seq_nr != 1) {
-                       slot->seq_nr = 1;
+               if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1) {
+                       slot->seq_nr--;
                        goto retry_nowait;
                }
-               goto session_recover;
+               /*
+                * RFC5661:
+                * A retry might be sent while the original request is
+                * still in progress on the replier. The replier SHOULD
+                * deal with the issue by returning NFS4ERR_DELAY as the
+                * reply to SEQUENCE or CB_SEQUENCE operation, but
+                * implementations MAY return NFS4ERR_SEQ_MISORDERED.
+                *
+                * Restart the search after a delay.
+                */
+               slot->seq_nr = slot->seq_nr_highest_sent;
+               goto out_retry;
        default:
                /* Just update the slot sequence no. */
                slot->seq_done = 1;
@@ -906,17 +923,6 @@ static const struct rpc_call_ops nfs41_call_sync_ops = {
        .rpc_call_done = nfs41_call_sync_done,
 };
 
-static void
-nfs4_sequence_process_interrupted(struct nfs_client *client,
-               struct nfs4_slot *slot, const struct cred *cred)
-{
-       struct rpc_task *task;
-
-       task = _nfs41_proc_sequence(client, cred, slot, true);
-       if (!IS_ERR(task))
-               rpc_put_task_async(task);
-}
-
 #else  /* !CONFIG_NFS_V4_1 */
 
 static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
@@ -937,16 +943,15 @@ int nfs4_sequence_done(struct rpc_task *task,
 }
 EXPORT_SYMBOL_GPL(nfs4_sequence_done);
 
-static void
-nfs4_sequence_process_interrupted(struct nfs_client *client,
-               struct nfs4_slot *slot, const struct cred *cred)
+#endif /* !CONFIG_NFS_V4_1 */
+
+static void nfs41_sequence_res_init(struct nfs4_sequence_res *res)
 {
-       WARN_ON_ONCE(1);
-       slot->interrupted = 0;
+       res->sr_timestamp = jiffies;
+       res->sr_status_flags = 0;
+       res->sr_status = 1;
 }
 
-#endif /* !CONFIG_NFS_V4_1 */
-
 static
 void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args,
                struct nfs4_sequence_res *res,
@@ -958,10 +963,6 @@ void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args,
        args->sa_slot = slot;
 
        res->sr_slot = slot;
-       res->sr_timestamp = jiffies;
-       res->sr_status_flags = 0;
-       res->sr_status = 1;
-
 }
 
 int nfs4_setup_sequence(struct nfs_client *client,
@@ -982,31 +983,25 @@ int nfs4_setup_sequence(struct nfs_client *client,
                task->tk_timeout = 0;
        }
 
-       for (;;) {
-               spin_lock(&tbl->slot_tbl_lock);
-               /* The state manager will wait until the slot table is empty */
-               if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
-                       goto out_sleep;
-
-               slot = nfs4_alloc_slot(tbl);
-               if (IS_ERR(slot)) {
-                       /* Try again in 1/4 second */
-                       if (slot == ERR_PTR(-ENOMEM))
-                               task->tk_timeout = HZ >> 2;
-                       goto out_sleep;
-               }
-               spin_unlock(&tbl->slot_tbl_lock);
+       spin_lock(&tbl->slot_tbl_lock);
+       /* The state manager will wait until the slot table is empty */
+       if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
+               goto out_sleep;
 
-               if (likely(!slot->interrupted))
-                       break;
-               nfs4_sequence_process_interrupted(client,
-                               slot, task->tk_msg.rpc_cred);
+       slot = nfs4_alloc_slot(tbl);
+       if (IS_ERR(slot)) {
+               /* Try again in 1/4 second */
+               if (slot == ERR_PTR(-ENOMEM))
+                       task->tk_timeout = HZ >> 2;
+               goto out_sleep;
        }
+       spin_unlock(&tbl->slot_tbl_lock);
 
        nfs4_sequence_attach_slot(args, res, slot);
 
        trace_nfs4_setup_sequence(session, args);
 out_start:
+       nfs41_sequence_res_init(res);
        rpc_call_start(task);
        return 0;
 
@@ -1555,6 +1550,10 @@ static void nfs_clear_open_stateid(struct nfs4_state *state,
 
 static void nfs_set_open_stateid_locked(struct nfs4_state *state,
                const nfs4_stateid *stateid, nfs4_stateid *freeme)
+       __must_hold(&state->owner->so_lock)
+       __must_hold(&state->seqlock)
+       __must_hold(RCU)
+
 {
        DEFINE_WAIT(wait);
        int status = 0;
@@ -5963,7 +5962,7 @@ out:
 /**
  * nfs4_proc_setclientid_confirm - Confirm client ID
  * @clp: state data structure
- * @res: result of a previous SETCLIENTID
+ * @arg: result of a previous SETCLIENTID
  * @cred: credential to use for this call
  *
  * Returns zero, a negative errno, or a negative NFS4ERR status code.
@@ -7527,7 +7526,7 @@ int nfs4_proc_fsid_present(struct inode *inode, const struct cred *cred)
        return status;
 }
 
-/**
+/*
  * If 'use_integrity' is true and the state managment nfs_client
  * cl_rpcclient is using krb5i/p, use the integrity protected cl_rpcclient
  * and the machine credential as per RFC3530bis and RFC5661 Security
@@ -8937,10 +8936,12 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
        if (status != 0)
                goto out;
 
-       /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
-       if (task->tk_status < 0 || lgp->res.layoutp->len == 0) {
+       if (task->tk_status < 0) {
                status = nfs4_layoutget_handle_exception(task, lgp, &exception);
                *timeout = exception.timeout;
+       } else if (lgp->res.layoutp->len == 0) {
+               status = -EAGAIN;
+               *timeout = nfs4_update_delay(&exception.timeout);
        } else
                lseg = pnfs_layout_process(lgp);
 out:
@@ -9219,7 +9220,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
        return status;
 }
 
-/**
+/*
  * Use the state managment nfs_client cl_rpcclient, which uses krb5i (if
  * possible) as per RFC3530bis and RFC5661 Security Considerations sections
  */
@@ -9484,7 +9485,7 @@ static const struct rpc_call_ops nfs41_free_stateid_ops = {
  * @server: server / transport on which to perform the operation
  * @stateid: state ID to release
  * @cred: credential
- * @is_recovery: set to true if this call needs to be privileged
+ * @privileged: set to true if this call needs to be privileged
  *
  * Note: this function is always asynchronous.
  */
@@ -9691,7 +9692,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
                | NFS_CAP_DEALLOCATE
                | NFS_CAP_SEEK
                | NFS_CAP_LAYOUTSTATS
-               | NFS_CAP_CLONE,
+               | NFS_CAP_CLONE
+               | NFS_CAP_LAYOUTERROR,
        .init_client = nfs41_init_client,
        .shutdown_client = nfs41_shutdown_client,
        .match_stateid = nfs41_match_stateid,
index a5489d70a7244642967422aea12008593536c304..bcb532def9e2bead7d82de63ea64915d9ab0fff4 100644 (file)
@@ -55,7 +55,7 @@ static void nfs4_shrink_slot_table(struct nfs4_slot_table  *tbl, u32 newsize)
 
 /**
  * nfs4_slot_tbl_drain_complete - wake waiters when drain is complete
- * @tbl - controlling slot table
+ * @tbl: controlling slot table
  *
  */
 void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl)
@@ -110,6 +110,8 @@ static struct nfs4_slot *nfs4_new_slot(struct nfs4_slot_table  *tbl,
                slot->table = tbl;
                slot->slot_nr = slotid;
                slot->seq_nr = seq_init;
+               slot->seq_nr_highest_sent = seq_init;
+               slot->seq_nr_last_acked = seq_init - 1;
        }
        return slot;
 }
@@ -276,7 +278,8 @@ static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl,
        p = &tbl->slots;
        while (*p) {
                (*p)->seq_nr = ivalue;
-               (*p)->interrupted = 0;
+               (*p)->seq_nr_highest_sent = ivalue;
+               (*p)->seq_nr_last_acked = ivalue - 1;
                p = &(*p)->next;
        }
        tbl->highest_used_slotid = NFS4_NO_SLOT;
index 3c550f2975611945264f0dea39cbd79dbb0d8672..b996ee23f1baec77699e7e779a91d6c67299fb61 100644 (file)
@@ -10,7 +10,7 @@
 
 /* maximum number of slots to use */
 #define NFS4_DEF_SLOT_TABLE_SIZE (64U)
-#define NFS4_DEF_CB_SLOT_TABLE_SIZE (1U)
+#define NFS4_DEF_CB_SLOT_TABLE_SIZE (16U)
 #define NFS4_MAX_SLOT_TABLE (1024U)
 #define NFS4_NO_SLOT ((u32)-1)
 
@@ -23,8 +23,9 @@ struct nfs4_slot {
        unsigned long           generation;
        u32                     slot_nr;
        u32                     seq_nr;
-       unsigned int            interrupted : 1,
-                               privileged : 1,
+       u32                     seq_nr_last_acked;
+       u32                     seq_nr_highest_sent;
+       unsigned int            privileged : 1,
                                seq_done : 1;
 };
 
index 02488b50534ac0f18a59590a0213f8954b447127..3de36479ed7a1f652b488021673ace66bf668c0e 100644 (file)
@@ -563,6 +563,7 @@ static void nfs4_gc_state_owners(struct nfs_server *server)
  * nfs4_get_state_owner - Look up a state owner given a credential
  * @server: nfs_server to search
  * @cred: RPC credential to match
+ * @gfp_flags: allocation mode
  *
  * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL.
  */
index b4557cf685fbb965b4571bf35414649dd4503a58..cd1a5c08da9ad85f3298919389296349d409a0a8 100644 (file)
@@ -524,6 +524,31 @@ TRACE_EVENT(nfs4_setup_sequence,
                )
 );
 
+TRACE_EVENT(nfs4_xdr_status,
+               TP_PROTO(
+                       u32 op,
+                       int error
+               ),
+
+               TP_ARGS(op, error),
+
+               TP_STRUCT__entry(
+                       __field(u32, op)
+                       __field(int, error)
+               ),
+
+               TP_fast_assign(
+                       __entry->op = op;
+                       __entry->error = -error;
+               ),
+
+               TP_printk(
+                       "operation %d: nfs status %d (%s)",
+                       __entry->op,
+                       __entry->error, show_nfsv4_errors(__entry->error)
+               )
+);
+
 DECLARE_EVENT_CLASS(nfs4_open_event,
                TP_PROTO(
                        const struct nfs_open_context *ctx,
index 2fc8f6fa25e4b400553af4506f243ae1ada058e2..cfcabc33e24d01136ba00c336f90497f657fb0a5 100644 (file)
@@ -54,6 +54,7 @@
 #include <linux/nfs_fs.h>
 
 #include "nfs4_fs.h"
+#include "nfs4trace.h"
 #include "internal.h"
 #include "nfs4idmap.h"
 #include "nfs4session.h"
@@ -214,14 +215,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
                                 nfs4_fattr_bitmap_maxsz)
 #define encode_read_maxsz      (op_encode_hdr_maxsz + \
                                 encode_stateid_maxsz + 3)
-#define decode_read_maxsz      (op_decode_hdr_maxsz + 2)
+#define decode_read_maxsz      (op_decode_hdr_maxsz + 2 + 1)
 #define encode_readdir_maxsz   (op_encode_hdr_maxsz + \
                                 2 + encode_verifier_maxsz + 5 + \
                                nfs4_label_maxsz)
 #define decode_readdir_maxsz   (op_decode_hdr_maxsz + \
-                                decode_verifier_maxsz)
+                                decode_verifier_maxsz + 1)
 #define encode_readlink_maxsz  (op_encode_hdr_maxsz)
-#define decode_readlink_maxsz  (op_decode_hdr_maxsz + 1)
+#define decode_readlink_maxsz  (op_decode_hdr_maxsz + 1 + 1)
 #define encode_write_maxsz     (op_encode_hdr_maxsz + \
                                 encode_stateid_maxsz + 4)
 #define decode_write_maxsz     (op_decode_hdr_maxsz + \
@@ -283,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
 #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
 #define encode_getacl_maxsz    (encode_getattr_maxsz)
 #define decode_getacl_maxsz    (op_decode_hdr_maxsz + \
-                                nfs4_fattr_bitmap_maxsz + 1)
+                                nfs4_fattr_bitmap_maxsz + 1 + 1)
 #define encode_setacl_maxsz    (op_encode_hdr_maxsz + \
                                 encode_stateid_maxsz + 3)
 #define decode_setacl_maxsz    (decode_setattr_maxsz)
 #define encode_fs_locations_maxsz \
                                (encode_getattr_maxsz)
 #define decode_fs_locations_maxsz \
-                               (0)
+                               (1)
 #define encode_secinfo_maxsz   (op_encode_hdr_maxsz + nfs4_name_maxsz)
 #define decode_secinfo_maxsz   (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
 
@@ -391,12 +392,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
                                1 /* opaque devaddr4 length */ + \
                                  /* devaddr4 payload is read into page */ \
                                1 /* notification bitmap length */ + \
-                               1 /* notification bitmap, word 0 */)
+                               1 /* notification bitmap, word 0 */ + \
+                               1 /* possible XDR padding */)
 #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
                                encode_stateid_maxsz)
 #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
                                decode_stateid_maxsz + \
-                               XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
+                               XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1)
 #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz +          \
                                2 /* offset */ + \
                                2 /* length */ + \
@@ -1015,12 +1017,11 @@ static void encode_compound_hdr(struct xdr_stream *xdr,
                                struct compound_hdr *hdr)
 {
        __be32 *p;
-       struct rpc_auth *auth = req->rq_cred->cr_auth;
 
        /* initialize running count of expected bytes in reply.
         * NOTE: the replied tag SHOULD be the same is the one sent,
         * but this is not required as a MUST for the server to do so. */
-       hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen;
+       hdr->replen = 3 + hdr->taglen;
 
        WARN_ON_ONCE(hdr->taglen > NFS4_MAXTAGLEN);
        encode_string(xdr, hdr->taglen, hdr->tag);
@@ -2340,9 +2341,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
        encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr);
        if (args->lg_args) {
                encode_layoutget(xdr, args->lg_args, &hdr);
-               xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
-                                args->lg_args->layout.pages,
-                                0, args->lg_args->layout.pglen);
+               rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0,
+                                       args->lg_args->layout.pglen,
+                                       hdr.replen);
        }
        encode_nops(&hdr);
 }
@@ -2386,9 +2387,9 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
        encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr);
        if (args->lg_args) {
                encode_layoutget(xdr, args->lg_args, &hdr);
-               xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
-                                args->lg_args->layout.pages,
-                                0, args->lg_args->layout.pglen);
+               rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0,
+                                       args->lg_args->layout.pglen,
+                                       hdr.replen);
        }
        encode_nops(&hdr);
 }
@@ -2498,8 +2499,8 @@ static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr,
        encode_putfh(xdr, args->fh, &hdr);
        encode_readlink(xdr, args, req, &hdr);
 
-       xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
-                       args->pgbase, args->pglen);
+       rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+                               args->pglen, hdr.replen);
        encode_nops(&hdr);
 }
 
@@ -2519,11 +2520,8 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
        encode_putfh(xdr, args->fh, &hdr);
        encode_readdir(xdr, args, req, &hdr);
 
-       xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
-                        args->pgbase, args->count);
-       dprintk("%s: inlined page args = (%u, %p, %u, %u)\n",
-                       __func__, hdr.replen << 2, args->pages,
-                       args->pgbase, args->count);
+       rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+                               args->count, hdr.replen);
        encode_nops(&hdr);
 }
 
@@ -2543,8 +2541,8 @@ static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
        encode_putfh(xdr, args->fh, &hdr);
        encode_read(xdr, args, &hdr);
 
-       xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
-                        args->pages, args->pgbase, args->count);
+       rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+                               args->count, hdr.replen);
        req->rq_rcv_buf.flags |= XDRBUF_READ;
        encode_nops(&hdr);
 }
@@ -2590,9 +2588,8 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
        encode_getattr(xdr, nfs4_acl_bitmap, NULL,
                        ARRAY_SIZE(nfs4_acl_bitmap), &hdr);
 
-       xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
-               args->acl_pages, 0, args->acl_len);
-
+       rpc_prepare_reply_pages(req, args->acl_pages, 0,
+                               args->acl_len, replen);
        encode_nops(&hdr);
 }
 
@@ -2813,9 +2810,8 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
                encode_fs_locations(xdr, args->bitmask, &hdr);
        }
 
-       /* Set up reply kvec to capture returned fs_locations array. */
-       xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
-                        (struct page **)&args->page, 0, PAGE_SIZE);
+       rpc_prepare_reply_pages(req, (struct page **)&args->page, 0,
+                               PAGE_SIZE, replen);
        encode_nops(&hdr);
 }
 
@@ -3017,10 +3013,8 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,
 
        /* set up reply kvec. Subtract notification bitmap max size (2)
         * so that notification bitmap is put in xdr_buf tail */
-       xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2,
-                        args->pdev->pages, args->pdev->pgbase,
-                        args->pdev->pglen);
-
+       rpc_prepare_reply_pages(req, args->pdev->pages, args->pdev->pgbase,
+                               args->pdev->pglen, hdr.replen - 2);
        encode_nops(&hdr);
 }
 
@@ -3041,9 +3035,8 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
        encode_putfh(xdr, NFS_FH(args->inode), &hdr);
        encode_layoutget(xdr, args, &hdr);
 
-       xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
-           args->layout.pages, 0, args->layout.pglen);
-
+       rpc_prepare_reply_pages(req, args->layout.pages, 0,
+                               args->layout.pglen, hdr.replen);
        encode_nops(&hdr);
 }
 
@@ -3144,22 +3137,12 @@ static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req,
 }
 #endif /* CONFIG_NFS_V4_1 */
 
-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
-{
-       dprintk("nfs: %s: prematurely hit end of receive buffer. "
-               "Remaining buffer length is %tu words.\n",
-               func, xdr->end - xdr->p);
-}
-
 static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string)
 {
        ssize_t ret = xdr_stream_decode_opaque_inline(xdr, (void **)string,
                        NFS4_OPAQUE_LIMIT);
-       if (unlikely(ret < 0)) {
-               if (ret == -EBADMSG)
-                       print_overflow_msg(__func__, xdr);
+       if (unlikely(ret < 0))
                return -EIO;
-       }
        *len = ret;
        return 0;
 }
@@ -3170,22 +3153,19 @@ static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
 
        p = xdr_inline_decode(xdr, 8);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        hdr->status = be32_to_cpup(p++);
        hdr->taglen = be32_to_cpup(p);
 
        p = xdr_inline_decode(xdr, hdr->taglen + 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        hdr->tag = (char *)p;
        p += XDR_QUADLEN(hdr->taglen);
        hdr->nops = be32_to_cpup(p);
        if (unlikely(hdr->nops < 1))
                return nfs4_stat_to_errno(hdr->status);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected,
@@ -3201,11 +3181,14 @@ static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected,
        opnum = be32_to_cpup(p++);
        if (unlikely(opnum != expected))
                goto out_bad_operation;
+       if (unlikely(*p != cpu_to_be32(NFS_OK)))
+               goto out_status;
+       *nfs_retval = 0;
+       return true;
+out_status:
        nfserr = be32_to_cpup(p);
-       if (nfserr == NFS_OK)
-               *nfs_retval = 0;
-       else
-               *nfs_retval = nfs4_stat_to_errno(nfserr);
+       trace_nfs4_xdr_status(opnum, nfserr);
+       *nfs_retval = nfs4_stat_to_errno(nfserr);
        return true;
 out_bad_operation:
        dprintk("nfs: Server returned operation"
@@ -3214,7 +3197,6 @@ out_bad_operation:
        *nfs_retval = -EREMOTEIO;
        return false;
 out_overflow:
-       print_overflow_msg(__func__, xdr);
        *nfs_retval = -EIO;
        return false;
 }
@@ -3235,10 +3217,9 @@ static int decode_ace(struct xdr_stream *xdr, void *ace)
        char *str;
 
        p = xdr_inline_decode(xdr, 12);
-       if (likely(p))
-               return decode_opaque_inline(xdr, &strlen, &str);
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
+       if (unlikely(!p))
+               return -EIO;
+       return decode_opaque_inline(xdr, &strlen, &str);
 }
 
 static ssize_t
@@ -3249,10 +3230,9 @@ decode_bitmap4(struct xdr_stream *xdr, uint32_t *bitmap, size_t sz)
        ret = xdr_stream_decode_uint32_array(xdr, bitmap, sz);
        if (likely(ret >= 0))
                return ret;
-       if (ret == -EMSGSIZE)
-               return sz;
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
+       if (ret != -EMSGSIZE)
+               return -EIO;
+       return sz;
 }
 
 static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
@@ -3268,13 +3248,10 @@ static int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, unsigne
 
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        *attrlen = be32_to_cpup(p);
        *savep = xdr_stream_pos(xdr);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask)
@@ -3303,7 +3280,7 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
        if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) {
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                *type = be32_to_cpup(p);
                if (*type < NF4REG || *type > NF4NAMEDATTR) {
                        dprintk("%s: bad type %d\n", __func__, *type);
@@ -3314,9 +3291,6 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
        }
        dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]);
        return ret;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_fh_expire_type(struct xdr_stream *xdr,
@@ -3330,15 +3304,12 @@ static int decode_attr_fh_expire_type(struct xdr_stream *xdr,
        if (likely(bitmap[0] & FATTR4_WORD0_FH_EXPIRE_TYPE)) {
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                *type = be32_to_cpup(p);
                bitmap[0] &= ~FATTR4_WORD0_FH_EXPIRE_TYPE;
        }
        dprintk("%s: expire type=0x%x\n", __func__, *type);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
@@ -3352,7 +3323,7 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
        if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) {
                p = xdr_inline_decode(xdr, 8);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                xdr_decode_hyper(p, change);
                bitmap[0] &= ~FATTR4_WORD0_CHANGE;
                ret = NFS_ATTR_FATTR_CHANGE;
@@ -3360,9 +3331,6 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
        dprintk("%s: change attribute=%Lu\n", __func__,
                        (unsigned long long)*change);
        return ret;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size)
@@ -3376,16 +3344,13 @@ static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *
        if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) {
                p = xdr_inline_decode(xdr, 8);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                xdr_decode_hyper(p, size);
                bitmap[0] &= ~FATTR4_WORD0_SIZE;
                ret = NFS_ATTR_FATTR_SIZE;
        }
        dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size);
        return ret;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3398,15 +3363,12 @@ static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, ui
        if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) {
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                *res = be32_to_cpup(p);
                bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT;
        }
        dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true");
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3419,15 +3381,12 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap,
        if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) {
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                *res = be32_to_cpup(p);
                bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT;
        }
        dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true");
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid)
@@ -3442,7 +3401,7 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
        if (likely(bitmap[0] & FATTR4_WORD0_FSID)) {
                p = xdr_inline_decode(xdr, 16);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                p = xdr_decode_hyper(p, &fsid->major);
                xdr_decode_hyper(p, &fsid->minor);
                bitmap[0] &= ~FATTR4_WORD0_FSID;
@@ -3452,9 +3411,6 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
                        (unsigned long long)fsid->major,
                        (unsigned long long)fsid->minor);
        return ret;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3467,15 +3423,12 @@ static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint
        if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) {
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                *res = be32_to_cpup(p);
                bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME;
        }
        dprintk("%s: file size=%u\n", __func__, (unsigned int)*res);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap, int32_t *res)
@@ -3487,14 +3440,11 @@ static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap, int32_t *
        if (likely(bitmap[0] & FATTR4_WORD0_RDATTR_ERROR)) {
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
                *res = -be32_to_cpup(p);
        }
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_exclcreat_supported(struct xdr_stream *xdr,
@@ -3526,13 +3476,13 @@ static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, stru
        if (likely(bitmap[0] & FATTR4_WORD0_FILEHANDLE)) {
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                len = be32_to_cpup(p);
                if (len > NFS4_FHSIZE)
                        return -EIO;
                p = xdr_inline_decode(xdr, len);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                if (fh != NULL) {
                        memcpy(fh->data, p, len);
                        fh->size = len;
@@ -3540,9 +3490,6 @@ static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, stru
                bitmap[0] &= ~FATTR4_WORD0_FILEHANDLE;
        }
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3555,15 +3502,12 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint
        if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) {
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                *res = be32_to_cpup(p);
                bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT;
        }
        dprintk("%s: ACLs supported=%u\n", __func__, (unsigned int)*res);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
@@ -3577,16 +3521,13 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
        if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) {
                p = xdr_inline_decode(xdr, 8);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                xdr_decode_hyper(p, fileid);
                bitmap[0] &= ~FATTR4_WORD0_FILEID;
                ret = NFS_ATTR_FATTR_FILEID;
        }
        dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
        return ret;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
@@ -3600,16 +3541,13 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
        if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) {
                p = xdr_inline_decode(xdr, 8);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                xdr_decode_hyper(p, fileid);
                bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
                ret = NFS_ATTR_FATTR_MOUNTED_ON_FILEID;
        }
        dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
        return ret;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3623,15 +3561,12 @@ static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin
        if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) {
                p = xdr_inline_decode(xdr, 8);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                xdr_decode_hyper(p, res);
                bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL;
        }
        dprintk("%s: files avail=%Lu\n", __func__, (unsigned long long)*res);
        return status;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3645,15 +3580,12 @@ static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint
        if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) {
                p = xdr_inline_decode(xdr, 8);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                xdr_decode_hyper(p, res);
                bitmap[0] &= ~FATTR4_WORD0_FILES_FREE;
        }
        dprintk("%s: files free=%Lu\n", __func__, (unsigned long long)*res);
        return status;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3667,15 +3599,12 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
        if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) {
                p = xdr_inline_decode(xdr, 8);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                xdr_decode_hyper(p, res);
                bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL;
        }
        dprintk("%s: files total=%Lu\n", __func__, (unsigned long long)*res);
        return status;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
@@ -3686,7 +3615,7 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
 
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        n = be32_to_cpup(p);
        if (n == 0)
                goto root_path;
@@ -3718,9 +3647,6 @@ out_eio:
        dprintk(" status %d", status);
        status = -EIO;
        goto out;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res)
@@ -3745,7 +3671,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
                goto out;
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               goto out_eio;
        n = be32_to_cpup(p);
        if (n <= 0)
                goto out_eio;
@@ -3758,7 +3684,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
                loc = &res->locations[res->nlocations];
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       goto out_eio;
                m = be32_to_cpup(p);
 
                dprintk("%s: servers:\n", __func__);
@@ -3796,8 +3722,6 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
 out:
        dprintk("%s: fs_locations done, error = %d\n", __func__, status);
        return status;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
 out_eio:
        status = -EIO;
        goto out;
@@ -3814,15 +3738,12 @@ static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uin
        if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) {
                p = xdr_inline_decode(xdr, 8);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                xdr_decode_hyper(p, res);
                bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE;
        }
        dprintk("%s: maxfilesize=%Lu\n", __func__, (unsigned long long)*res);
        return status;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink)
@@ -3836,15 +3757,12 @@ static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
        if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) {
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                *maxlink = be32_to_cpup(p);
                bitmap[0] &= ~FATTR4_WORD0_MAXLINK;
        }
        dprintk("%s: maxlink=%u\n", __func__, *maxlink);
        return status;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname)
@@ -3858,15 +3776,12 @@ static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
        if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) {
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                *maxname = be32_to_cpup(p);
                bitmap[0] &= ~FATTR4_WORD0_MAXNAME;
        }
        dprintk("%s: maxname=%u\n", __func__, *maxname);
        return status;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3881,7 +3796,7 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
                uint64_t maxread;
                p = xdr_inline_decode(xdr, 8);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                xdr_decode_hyper(p, &maxread);
                if (maxread > 0x7FFFFFFF)
                        maxread = 0x7FFFFFFF;
@@ -3890,9 +3805,6 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
        }
        dprintk("%s: maxread=%lu\n", __func__, (unsigned long)*res);
        return status;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3907,7 +3819,7 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
                uint64_t maxwrite;
                p = xdr_inline_decode(xdr, 8);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                xdr_decode_hyper(p, &maxwrite);
                if (maxwrite > 0x7FFFFFFF)
                        maxwrite = 0x7FFFFFFF;
@@ -3916,9 +3828,6 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
        }
        dprintk("%s: maxwrite=%lu\n", __func__, (unsigned long)*res);
        return status;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode)
@@ -3933,7 +3842,7 @@ static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *m
        if (likely(bitmap[1] & FATTR4_WORD1_MODE)) {
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                tmp = be32_to_cpup(p);
                *mode = tmp & ~S_IFMT;
                bitmap[1] &= ~FATTR4_WORD1_MODE;
@@ -3941,9 +3850,6 @@ static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *m
        }
        dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode);
        return ret;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink)
@@ -3957,16 +3863,13 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
        if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) {
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                *nlink = be32_to_cpup(p);
                bitmap[1] &= ~FATTR4_WORD1_NUMLINKS;
                ret = NFS_ATTR_FATTR_NLINK;
        }
        dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink);
        return ret;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static ssize_t decode_nfs4_string(struct xdr_stream *xdr,
@@ -4011,10 +3914,9 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
                return NFS_ATTR_FATTR_OWNER;
        }
 out:
-       if (len != -EBADMSG)
-               return 0;
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
+       if (len == -EBADMSG)
+               return -EIO;
+       return 0;
 }
 
 static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
@@ -4046,10 +3948,9 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
                return NFS_ATTR_FATTR_GROUP;
        }
 out:
-       if (len != -EBADMSG)
-               return 0;
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
+       if (len == -EBADMSG)
+               return -EIO;
+       return 0;
 }
 
 static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev)
@@ -4066,7 +3967,7 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
 
                p = xdr_inline_decode(xdr, 8);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                major = be32_to_cpup(p++);
                minor = be32_to_cpup(p);
                tmp = MKDEV(major, minor);
@@ -4077,9 +3978,6 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
        }
        dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor);
        return ret;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -4093,15 +3991,12 @@ static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin
        if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) {
                p = xdr_inline_decode(xdr, 8);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                xdr_decode_hyper(p, res);
                bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL;
        }
        dprintk("%s: space avail=%Lu\n", __func__, (unsigned long long)*res);
        return status;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -4115,15 +4010,12 @@ static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint
        if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) {
                p = xdr_inline_decode(xdr, 8);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                xdr_decode_hyper(p, res);
                bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE;
        }
        dprintk("%s: space free=%Lu\n", __func__, (unsigned long long)*res);
        return status;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -4137,15 +4029,12 @@ static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
        if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) {
                p = xdr_inline_decode(xdr, 8);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                xdr_decode_hyper(p, res);
                bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL;
        }
        dprintk("%s: space total=%Lu\n", __func__, (unsigned long long)*res);
        return status;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used)
@@ -4159,7 +4048,7 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint
        if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) {
                p = xdr_inline_decode(xdr, 8);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                xdr_decode_hyper(p, used);
                bitmap[1] &= ~FATTR4_WORD1_SPACE_USED;
                ret = NFS_ATTR_FATTR_SPACE_USED;
@@ -4167,9 +4056,6 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint
        dprintk("%s: space used=%Lu\n", __func__,
                        (unsigned long long)*used);
        return ret;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static __be32 *
@@ -4189,12 +4075,9 @@ static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
 
        p = xdr_inline_decode(xdr, nfstime4_maxsz << 2);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        xdr_decode_nfstime4(p, time);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
@@ -4265,19 +4148,19 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap,
        if (likely(bitmap[2] & FATTR4_WORD2_SECURITY_LABEL)) {
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                lfs = be32_to_cpup(p++);
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                pi = be32_to_cpup(p++);
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                len = be32_to_cpup(p++);
                p = xdr_inline_decode(xdr, len);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                if (len < NFS4_MAXLABELLEN) {
                        if (label) {
                                memcpy(label->label, p, len);
@@ -4295,10 +4178,6 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap,
                dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__,
                        (char *)label->label, label->len, label->pi, label->lfs);
        return status;
-
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
@@ -4342,14 +4221,11 @@ static int decode_change_info(struct xdr_stream *xdr, struct nfs4_change_info *c
 
        p = xdr_inline_decode(xdr, 20);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        cinfo->atomic = be32_to_cpup(p++);
        p = xdr_decode_hyper(p, &cinfo->before);
        xdr_decode_hyper(p, &cinfo->after);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_access(struct xdr_stream *xdr, u32 *supported, u32 *access)
@@ -4363,24 +4239,19 @@ static int decode_access(struct xdr_stream *xdr, u32 *supported, u32 *access)
                return status;
        p = xdr_inline_decode(xdr, 8);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        supp = be32_to_cpup(p++);
        acc = be32_to_cpup(p);
        *supported = supp;
        *access = acc;
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)
 {
        ssize_t ret = xdr_stream_decode_opaque_fixed(xdr, buf, len);
-       if (unlikely(ret < 0)) {
-               print_overflow_msg(__func__, xdr);
+       if (unlikely(ret < 0))
                return -EIO;
-       }
        return 0;
 }
 
@@ -4460,13 +4331,11 @@ static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
                return status;
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        bmlen = be32_to_cpup(p);
        p = xdr_inline_decode(xdr, bmlen << 2);
        if (likely(p))
                return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
        return -EIO;
 }
 
@@ -4574,13 +4443,10 @@ static int decode_threshold_hint(struct xdr_stream *xdr,
        if (likely(bitmap[0] & hint_bit)) {
                p = xdr_inline_decode(xdr, 8);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                xdr_decode_hyper(p, res);
        }
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_first_threshold_item4(struct xdr_stream *xdr,
@@ -4593,10 +4459,8 @@ static int decode_first_threshold_item4(struct xdr_stream *xdr,
 
        /* layout type */
        p = xdr_inline_decode(xdr, 4);
-       if (unlikely(!p)) {
-               print_overflow_msg(__func__, xdr);
+       if (unlikely(!p))
                return -EIO;
-       }
        res->l_type = be32_to_cpup(p);
 
        /* thi_hintset bitmap */
@@ -4654,7 +4518,7 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr,
                        return -EREMOTEIO;
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                num = be32_to_cpup(p);
                if (num == 0)
                        return 0;
@@ -4667,9 +4531,6 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr,
                bitmap[2] &= ~FATTR4_WORD2_MDSTHRESHOLD;
        }
        return status;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
@@ -4857,7 +4718,7 @@ static int decode_pnfs_layout_types(struct xdr_stream *xdr,
 
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        fsinfo->nlayouttypes = be32_to_cpup(p);
 
        /* pNFS is not supported by the underlying file system */
@@ -4867,7 +4728,7 @@ static int decode_pnfs_layout_types(struct xdr_stream *xdr,
        /* Decode and set first layout type, move xdr->p past unused types */
        p = xdr_inline_decode(xdr, fsinfo->nlayouttypes * 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
 
        /* If we get too many, then just cap it at the max */
        if (fsinfo->nlayouttypes > NFS_MAX_LAYOUT_TYPES) {
@@ -4879,9 +4740,6 @@ static int decode_pnfs_layout_types(struct xdr_stream *xdr,
        for(i = 0; i < fsinfo->nlayouttypes; ++i)
                fsinfo->layouttype[i] = be32_to_cpup(p++);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 /*
@@ -4915,10 +4773,8 @@ static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
        *res = 0;
        if (bitmap[2] & FATTR4_WORD2_LAYOUT_BLKSIZE) {
                p = xdr_inline_decode(xdr, 4);
-               if (unlikely(!p)) {
-                       print_overflow_msg(__func__, xdr);
+               if (unlikely(!p))
                        return -EIO;
-               }
                *res = be32_to_cpup(p);
                bitmap[2] &= ~FATTR4_WORD2_LAYOUT_BLKSIZE;
        }
@@ -4937,10 +4793,8 @@ static int decode_attr_clone_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
        *res = 0;
        if (bitmap[2] & FATTR4_WORD2_CLONE_BLKSIZE) {
                p = xdr_inline_decode(xdr, 4);
-               if (unlikely(!p)) {
-                       print_overflow_msg(__func__, xdr);
+               if (unlikely(!p))
                        return -EIO;
-               }
                *res = be32_to_cpup(p);
                bitmap[2] &= ~FATTR4_WORD2_CLONE_BLKSIZE;
        }
@@ -5016,19 +4870,16 @@ static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh)
 
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        len = be32_to_cpup(p);
        if (len > NFS4_FHSIZE)
                return -EIO;
        fh->size = len;
        p = xdr_inline_decode(xdr, len);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        memcpy(fh->data, p, len);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -5052,7 +4903,7 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
 
        p = xdr_inline_decode(xdr, 32); /* read 32 bytes */
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        p = xdr_decode_hyper(p, &offset); /* read 2 8-byte long words */
        p = xdr_decode_hyper(p, &length);
        type = be32_to_cpup(p++); /* 4 byte read */
@@ -5069,11 +4920,9 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
        p = xdr_decode_hyper(p, &clientid); /* read 8 bytes */
        namelen = be32_to_cpup(p); /* read 4 bytes */  /* have read all 32 bytes now */
        p = xdr_inline_decode(xdr, namelen); /* variable size field */
-       if (likely(p))
-               return -NFS4ERR_DENIED;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
+       if (likely(!p))
+               return -EIO;
+       return -NFS4ERR_DENIED;
 }
 
 static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res)
@@ -5142,7 +4991,7 @@ static int decode_space_limit(struct xdr_stream *xdr,
 
        p = xdr_inline_decode(xdr, 12);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        limit_type = be32_to_cpup(p++);
        switch (limit_type) {
        case NFS4_LIMIT_SIZE:
@@ -5156,9 +5005,6 @@ static int decode_space_limit(struct xdr_stream *xdr,
        maxsize >>= PAGE_SHIFT;
        *pagemod_limit = min_t(u64, maxsize, ULONG_MAX);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_rw_delegation(struct xdr_stream *xdr,
@@ -5173,7 +5019,7 @@ static int decode_rw_delegation(struct xdr_stream *xdr,
                return status;
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        res->do_recall = be32_to_cpup(p);
 
        switch (delegation_type) {
@@ -5186,9 +5032,6 @@ static int decode_rw_delegation(struct xdr_stream *xdr,
                                return -EIO;
        }
        return decode_ace(xdr, NULL);
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
@@ -5198,7 +5041,7 @@ static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
 
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        why_no_delegation = be32_to_cpup(p);
        switch (why_no_delegation) {
                case WND4_CONTENTION:
@@ -5207,9 +5050,6 @@ static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
                        /* Ignore for now */
        }
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
@@ -5219,7 +5059,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
 
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        delegation_type = be32_to_cpup(p);
        res->delegation_type = 0;
        switch (delegation_type) {
@@ -5232,9 +5072,6 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
                return decode_no_delegation(xdr, res);
        }
        return -EIO;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
@@ -5256,7 +5093,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
 
        p = xdr_inline_decode(xdr, 8);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        res->rflags = be32_to_cpup(p++);
        bmlen = be32_to_cpup(p);
        if (bmlen > 10)
@@ -5264,7 +5101,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
 
        p = xdr_inline_decode(xdr, bmlen << 2);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
        for (i = 0; i < savewords; ++i)
                res->attrset[i] = be32_to_cpup(p++);
@@ -5275,9 +5112,6 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
 xdr_error:
        dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen);
        return -EIO;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res)
@@ -5326,7 +5160,7 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req,
                return status;
        p = xdr_inline_decode(xdr, 8);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        eof = be32_to_cpup(p++);
        count = be32_to_cpup(p);
        recvd = xdr_read_pages(xdr, count);
@@ -5339,9 +5173,6 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req,
        res->eof = eof;
        res->count = count;
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir)
@@ -5374,7 +5205,7 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
        /* Convert length of symlink */
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        len = be32_to_cpup(p);
        if (len >= rcvbuf->page_len || len <= 0) {
                dprintk("nfs: server returned giant symlink!\n");
@@ -5395,9 +5226,6 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
         */
        xdr_terminate_string(rcvbuf, len);
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -5500,7 +5328,6 @@ static int decode_setattr(struct xdr_stream *xdr)
                return status;
        if (decode_bitmap4(xdr, NULL, 0) >= 0)
                return 0;
-       print_overflow_msg(__func__, xdr);
        return -EIO;
 }
 
@@ -5512,7 +5339,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_re
 
        p = xdr_inline_decode(xdr, 8);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        opnum = be32_to_cpup(p++);
        if (opnum != OP_SETCLIENTID) {
                dprintk("nfs: decode_setclientid: Server returned operation"
@@ -5523,7 +5350,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_re
        if (nfserr == NFS_OK) {
                p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                p = xdr_decode_hyper(p, &res->clientid);
                memcpy(res->confirm.data, p, NFS4_VERIFIER_SIZE);
        } else if (nfserr == NFSERR_CLID_INUSE) {
@@ -5532,28 +5359,25 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_re
                /* skip netid string */
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                len = be32_to_cpup(p);
                p = xdr_inline_decode(xdr, len);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
 
                /* skip uaddr string */
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                len = be32_to_cpup(p);
                p = xdr_inline_decode(xdr, len);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                return -NFSERR_CLID_INUSE;
        } else
                return nfs4_stat_to_errno(nfserr);
 
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_setclientid_confirm(struct xdr_stream *xdr)
@@ -5572,13 +5396,10 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_pgio_res *res)
 
        p = xdr_inline_decode(xdr, 8);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        res->count = be32_to_cpup(p++);
        res->verf->committed = be32_to_cpup(p++);
        return decode_write_verifier(xdr, &res->verf->verifier);
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_delegreturn(struct xdr_stream *xdr)
@@ -5594,30 +5415,24 @@ static int decode_secinfo_gss(struct xdr_stream *xdr,
 
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        oid_len = be32_to_cpup(p);
        if (oid_len > GSS_OID_MAX_LEN)
-               goto out_err;
+               return -EINVAL;
 
        p = xdr_inline_decode(xdr, oid_len);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        memcpy(flavor->flavor_info.oid.data, p, oid_len);
        flavor->flavor_info.oid.len = oid_len;
 
        p = xdr_inline_decode(xdr, 8);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        flavor->flavor_info.qop = be32_to_cpup(p++);
        flavor->flavor_info.service = be32_to_cpup(p);
 
        return 0;
-
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
-out_err:
-       return -EINVAL;
 }
 
 static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
@@ -5629,7 +5444,7 @@ static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res
 
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
 
        res->flavors->num_flavors = 0;
        num_flavors = be32_to_cpup(p);
@@ -5641,7 +5456,7 @@ static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res
 
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                sec_flavor->flavor = be32_to_cpup(p);
 
                if (sec_flavor->flavor == RPC_AUTH_GSS) {
@@ -5655,9 +5470,6 @@ static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res
        status = 0;
 out:
        return status;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
@@ -5711,11 +5523,11 @@ static int decode_exchange_id(struct xdr_stream *xdr,
 
        p = xdr_inline_decode(xdr, 8);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        xdr_decode_hyper(p, &res->clientid);
        p = xdr_inline_decode(xdr, 12);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        res->seqid = be32_to_cpup(p++);
        res->flags = be32_to_cpup(p++);
 
@@ -5739,7 +5551,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
        /* server_owner4.so_minor_id */
        p = xdr_inline_decode(xdr, 8);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        p = xdr_decode_hyper(p, &res->server_owner->minor_id);
 
        /* server_owner4.so_major_id */
@@ -5759,7 +5571,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
        /* Implementation Id */
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        impl_id_count = be32_to_cpup(p++);
 
        if (impl_id_count) {
@@ -5778,16 +5590,13 @@ static int decode_exchange_id(struct xdr_stream *xdr,
                /* nii_date */
                p = xdr_inline_decode(xdr, 12);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
                p = xdr_decode_hyper(p, &res->impl_id->date.seconds);
                res->impl_id->date.nseconds = be32_to_cpup(p);
 
                /* if there's more than one entry, ignore the rest */
        }
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_chan_attrs(struct xdr_stream *xdr,
@@ -5798,7 +5607,7 @@ static int decode_chan_attrs(struct xdr_stream *xdr,
 
        p = xdr_inline_decode(xdr, 28);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        val = be32_to_cpup(p++);        /* headerpadsz */
        if (val)
                return -EINVAL;         /* no support for header padding yet */
@@ -5816,12 +5625,9 @@ static int decode_chan_attrs(struct xdr_stream *xdr,
        if (nr_attrs == 1) {
                p = xdr_inline_decode(xdr, 4); /* skip rdma_attrs */
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
        }
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid)
@@ -5844,7 +5650,7 @@ static int decode_bind_conn_to_session(struct xdr_stream *xdr,
        /* dir flags, rdma mode bool */
        p = xdr_inline_decode(xdr, 8);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
 
        res->dir = be32_to_cpup(p++);
        if (res->dir == 0 || res->dir > NFS4_CDFS4_BOTH)
@@ -5855,9 +5661,6 @@ static int decode_bind_conn_to_session(struct xdr_stream *xdr,
                res->use_conn_in_rdma_mode = true;
 
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_create_session(struct xdr_stream *xdr,
@@ -5875,7 +5678,7 @@ static int decode_create_session(struct xdr_stream *xdr,
        /* seqid, flags */
        p = xdr_inline_decode(xdr, 8);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        res->seqid = be32_to_cpup(p++);
        res->flags = be32_to_cpup(p);
 
@@ -5884,9 +5687,6 @@ static int decode_create_session(struct xdr_stream *xdr,
        if (!status)
                status = decode_chan_attrs(xdr, &res->bc_attrs);
        return status;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
@@ -5967,7 +5767,6 @@ out_err:
        res->sr_status = status;
        return status;
 out_overflow:
-       print_overflow_msg(__func__, xdr);
        status = -EIO;
        goto out_err;
 #else  /* CONFIG_NFS_V4_1 */
@@ -5995,7 +5794,7 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
                if (status == -ETOOSMALL) {
                        p = xdr_inline_decode(xdr, 4);
                        if (unlikely(!p))
-                               goto out_overflow;
+                               return -EIO;
                        pdev->mincount = be32_to_cpup(p);
                        dprintk("%s: Min count too small. mincnt = %u\n",
                                __func__, pdev->mincount);
@@ -6005,7 +5804,7 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
 
        p = xdr_inline_decode(xdr, 8);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        type = be32_to_cpup(p++);
        if (type != pdev->layout_type) {
                dprintk("%s: layout mismatch req: %u pdev: %u\n",
@@ -6019,19 +5818,19 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
         */
        pdev->mincount = be32_to_cpup(p);
        if (xdr_read_pages(xdr, pdev->mincount) != pdev->mincount)
-               goto out_overflow;
+               return -EIO;
 
        /* Parse notification bitmap, verifying that it is zero. */
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        len = be32_to_cpup(p);
        if (len) {
                uint32_t i;
 
                p = xdr_inline_decode(xdr, 4 * len);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
 
                res->notification = be32_to_cpup(p++);
                for (i = 1; i < len; i++) {
@@ -6043,9 +5842,6 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
                }
        }
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
@@ -6115,7 +5911,6 @@ out:
        res->status = status;
        return status;
 out_overflow:
-       print_overflow_msg(__func__, xdr);
        status = -EIO;
        goto out;
 }
@@ -6131,16 +5926,13 @@ static int decode_layoutreturn(struct xdr_stream *xdr,
                return status;
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        res->lrs_present = be32_to_cpup(p);
        if (res->lrs_present)
                status = decode_layout_stateid(xdr, &res->stateid);
        else
                nfs4_stateid_copy(&res->stateid, &invalid_stateid);
        return status;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_layoutcommit(struct xdr_stream *xdr,
@@ -6158,19 +5950,16 @@ static int decode_layoutcommit(struct xdr_stream *xdr,
 
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        sizechanged = be32_to_cpup(p);
 
        if (sizechanged) {
                /* throw away new size */
                p = xdr_inline_decode(xdr, 8);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EIO;
        }
        return 0;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 
 static int decode_test_stateid(struct xdr_stream *xdr,
@@ -6186,21 +5975,17 @@ static int decode_test_stateid(struct xdr_stream *xdr,
 
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        num_res = be32_to_cpup(p++);
        if (num_res != 1)
-               goto out;
+               return -EIO;
 
        p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EIO;
        res->status = be32_to_cpup(p++);
 
        return status;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-out:
-       return -EIO;
 }
 
 static int decode_free_stateid(struct xdr_stream *xdr,
@@ -7570,11 +7355,11 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
        uint64_t new_cookie;
        __be32 *p = xdr_inline_decode(xdr, 4);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EAGAIN;
        if (*p == xdr_zero) {
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
-                       goto out_overflow;
+                       return -EAGAIN;
                if (*p == xdr_zero)
                        return -EAGAIN;
                entry->eof = 1;
@@ -7583,13 +7368,13 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 
        p = xdr_inline_decode(xdr, 12);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EAGAIN;
        p = xdr_decode_hyper(p, &new_cookie);
        entry->len = be32_to_cpup(p);
 
        p = xdr_inline_decode(xdr, entry->len);
        if (unlikely(!p))
-               goto out_overflow;
+               return -EAGAIN;
        entry->name = (const char *) p;
 
        /*
@@ -7601,14 +7386,14 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
        entry->fattr->valid = 0;
 
        if (decode_attr_bitmap(xdr, bitmap) < 0)
-               goto out_overflow;
+               return -EAGAIN;
 
        if (decode_attr_length(xdr, &len, &savep) < 0)
-               goto out_overflow;
+               return -EAGAIN;
 
        if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
                        NULL, entry->label, entry->server) < 0)
-               goto out_overflow;
+               return -EAGAIN;
        if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
                entry->ino = entry->fattr->mounted_on_fileid;
        else if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
@@ -7622,10 +7407,6 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
        entry->cookie = new_cookie;
 
        return 0;
-
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EAGAIN;
 }
 
 /*
@@ -7791,6 +7572,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
        PROC42(COPY,            enc_copy,               dec_copy),
        PROC42(OFFLOAD_CANCEL,  enc_offload_cancel,     dec_offload_cancel),
        PROC(LOOKUPP,           enc_lookupp,            dec_lookupp),
+       PROC42(LAYOUTERROR,     enc_layouterror,        dec_layouterror),
 };
 
 static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
index b60d5fbd7727507009dff6e15110ea20d54ed24f..a90b363500c22f76c6865d22c2db513ecbb9523f 100644 (file)
@@ -11,3 +11,4 @@
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_enter);
 EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_exit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_xdr_status);
index bd60f8d1e18102b04c37241eb31e0993cb16c348..a0d6910aa03a47b702f4c94f6c73e1b11c47c25c 100644 (file)
@@ -969,6 +969,91 @@ TRACE_EVENT(nfs_commit_done,
                )
 );
 
+TRACE_DEFINE_ENUM(NFS_OK);
+TRACE_DEFINE_ENUM(NFSERR_PERM);
+TRACE_DEFINE_ENUM(NFSERR_NOENT);
+TRACE_DEFINE_ENUM(NFSERR_IO);
+TRACE_DEFINE_ENUM(NFSERR_NXIO);
+TRACE_DEFINE_ENUM(NFSERR_ACCES);
+TRACE_DEFINE_ENUM(NFSERR_EXIST);
+TRACE_DEFINE_ENUM(NFSERR_XDEV);
+TRACE_DEFINE_ENUM(NFSERR_NODEV);
+TRACE_DEFINE_ENUM(NFSERR_NOTDIR);
+TRACE_DEFINE_ENUM(NFSERR_ISDIR);
+TRACE_DEFINE_ENUM(NFSERR_INVAL);
+TRACE_DEFINE_ENUM(NFSERR_FBIG);
+TRACE_DEFINE_ENUM(NFSERR_NOSPC);
+TRACE_DEFINE_ENUM(NFSERR_ROFS);
+TRACE_DEFINE_ENUM(NFSERR_MLINK);
+TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG);
+TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY);
+TRACE_DEFINE_ENUM(NFSERR_DQUOT);
+TRACE_DEFINE_ENUM(NFSERR_STALE);
+TRACE_DEFINE_ENUM(NFSERR_REMOTE);
+TRACE_DEFINE_ENUM(NFSERR_WFLUSH);
+TRACE_DEFINE_ENUM(NFSERR_BADHANDLE);
+TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC);
+TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE);
+TRACE_DEFINE_ENUM(NFSERR_NOTSUPP);
+TRACE_DEFINE_ENUM(NFSERR_TOOSMALL);
+TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT);
+TRACE_DEFINE_ENUM(NFSERR_BADTYPE);
+TRACE_DEFINE_ENUM(NFSERR_JUKEBOX);
+
+#define nfs_show_status(x) \
+       __print_symbolic(x, \
+                       { NFS_OK, "OK" }, \
+                       { NFSERR_PERM, "PERM" }, \
+                       { NFSERR_NOENT, "NOENT" }, \
+                       { NFSERR_IO, "IO" }, \
+                       { NFSERR_NXIO, "NXIO" }, \
+                       { NFSERR_ACCES, "ACCES" }, \
+                       { NFSERR_EXIST, "EXIST" }, \
+                       { NFSERR_XDEV, "XDEV" }, \
+                       { NFSERR_NODEV, "NODEV" }, \
+                       { NFSERR_NOTDIR, "NOTDIR" }, \
+                       { NFSERR_ISDIR, "ISDIR" }, \
+                       { NFSERR_INVAL, "INVAL" }, \
+                       { NFSERR_FBIG, "FBIG" }, \
+                       { NFSERR_NOSPC, "NOSPC" }, \
+                       { NFSERR_ROFS, "ROFS" }, \
+                       { NFSERR_MLINK, "MLINK" }, \
+                       { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \
+                       { NFSERR_NOTEMPTY, "NOTEMPTY" }, \
+                       { NFSERR_DQUOT, "DQUOT" }, \
+                       { NFSERR_STALE, "STALE" }, \
+                       { NFSERR_REMOTE, "REMOTE" }, \
+                       { NFSERR_WFLUSH, "WFLUSH" }, \
+                       { NFSERR_BADHANDLE, "BADHANDLE" }, \
+                       { NFSERR_NOT_SYNC, "NOTSYNC" }, \
+                       { NFSERR_BAD_COOKIE, "BADCOOKIE" }, \
+                       { NFSERR_NOTSUPP, "NOTSUPP" }, \
+                       { NFSERR_TOOSMALL, "TOOSMALL" }, \
+                       { NFSERR_SERVERFAULT, "REMOTEIO" }, \
+                       { NFSERR_BADTYPE, "BADTYPE" }, \
+                       { NFSERR_JUKEBOX, "JUKEBOX" })
+
+TRACE_EVENT(nfs_xdr_status,
+               TP_PROTO(
+                       int error
+               ),
+
+               TP_ARGS(error),
+
+               TP_STRUCT__entry(
+                       __field(int, error)
+               ),
+
+               TP_fast_assign(
+                       __entry->error = error;
+               ),
+
+               TP_printk(
+                       "error=%d (%s)",
+                       __entry->error, nfs_show_status(__entry->error)
+               )
+);
+
 #endif /* _TRACE_NFS_H */
 
 #undef TRACE_INCLUDE_PATH
index e54d899c18481ab5c2ddb3ec596b8004f6dba4de..e9f39fa5964b0773cd5da3e39363710ca510d6f5 100644 (file)
@@ -350,7 +350,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
 
 /**
  * nfs_unlock_request - Unlock request and wake up sleepers.
- * @req:
+ * @req: pointer to request
  */
 void nfs_unlock_request(struct nfs_page *req)
 {
@@ -368,7 +368,7 @@ void nfs_unlock_request(struct nfs_page *req)
 
 /**
  * nfs_unlock_and_release_request - Unlock request and release the nfs_page
- * @req:
+ * @req: pointer to request
  */
 void nfs_unlock_and_release_request(struct nfs_page *req)
 {
@@ -531,7 +531,6 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
  * nfs_pgio_rpcsetup - Set up arguments for a pageio call
  * @hdr: The pageio hdr
  * @count: Number of bytes to read
- * @offset: Initial offset
  * @how: How to commit data (writes only)
  * @cinfo: Commit information for the call (writes only)
  */
@@ -634,7 +633,6 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
 
 /**
  * nfs_pgio_error - Clean up from a pageio error
- * @desc: IO descriptor
  * @hdr: pageio header
  */
 static void nfs_pgio_error(struct nfs_pgio_header *hdr)
@@ -768,8 +766,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
        pageused = 0;
        while (!list_empty(head)) {
                req = nfs_list_entry(head->next);
-               nfs_list_remove_request(req);
-               nfs_list_add_request(req, &hdr->pages);
+               nfs_list_move_request(req, &hdr->pages);
 
                if (!last_page || last_page != req->wb_page) {
                        pageused++;
@@ -893,6 +890,7 @@ static bool nfs_match_lock_context(const struct nfs_lock_context *l1,
  * nfs_can_coalesce_requests - test two requests for compatibility
  * @prev: pointer to nfs_page
  * @req: pointer to nfs_page
+ * @pgio: pointer to nfs_pagio_descriptor
  *
  * The nfs_page structures 'prev' and 'req' are compared to ensure that the
  * page data area they describe is contiguous, and that their RPC
@@ -961,8 +959,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
        }
        if (!nfs_can_coalesce_requests(prev, req, desc))
                return 0;
-       nfs_list_remove_request(req);
-       nfs_list_add_request(req, &mirror->pg_list);
+       nfs_list_move_request(req, &mirror->pg_list);
        mirror->pg_count += req->wb_bytes;
        return 1;
 }
@@ -988,6 +985,16 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
        }
 }
 
+static void
+nfs_pageio_cleanup_request(struct nfs_pageio_descriptor *desc,
+               struct nfs_page *req)
+{
+       LIST_HEAD(head);
+
+       nfs_list_move_request(req, &head);
+       desc->pg_completion_ops->error_cleanup(&head, desc->pg_error);
+}
+
 /**
  * nfs_pageio_add_request - Attempt to coalesce a request into a page list.
  * @desc: destination io descriptor
@@ -1025,10 +1032,8 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
                        nfs_page_group_unlock(req);
                        desc->pg_moreio = 1;
                        nfs_pageio_doio(desc);
-                       if (desc->pg_error < 0)
-                               return 0;
-                       if (mirror->pg_recoalesce)
-                               return 0;
+                       if (desc->pg_error < 0 || mirror->pg_recoalesce)
+                               goto out_cleanup_subreq;
                        /* retry add_request for this subreq */
                        nfs_page_group_lock(req);
                        continue;
@@ -1061,6 +1066,10 @@ err_ptr:
        desc->pg_error = PTR_ERR(subreq);
        nfs_page_group_unlock(req);
        return 0;
+out_cleanup_subreq:
+       if (req != subreq)
+               nfs_pageio_cleanup_request(desc, subreq);
+       return 0;
 }
 
 static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
@@ -1079,7 +1088,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
                        struct nfs_page *req;
 
                        req = list_first_entry(&head, struct nfs_page, wb_list);
-                       nfs_list_remove_request(req);
                        if (__nfs_pageio_add_request(desc, req))
                                continue;
                        if (desc->pg_error < 0) {
@@ -1120,7 +1128,8 @@ static void nfs_pageio_error_cleanup(struct nfs_pageio_descriptor *desc)
 
        for (midx = 0; midx < desc->pg_mirror_count; midx++) {
                mirror = &desc->pg_mirrors[midx];
-               desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
+               desc->pg_completion_ops->error_cleanup(&mirror->pg_list,
+                               desc->pg_error);
        }
 }
 
@@ -1168,11 +1177,14 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
                if (nfs_pgio_has_mirroring(desc))
                        desc->pg_mirror_idx = midx;
                if (!nfs_pageio_add_request_mirror(desc, dupreq))
-                       goto out_failed;
+                       goto out_cleanup_subreq;
        }
 
        return 1;
 
+out_cleanup_subreq:
+       if (req != dupreq)
+               nfs_pageio_cleanup_request(desc, dupreq);
 out_failed:
        nfs_pageio_error_cleanup(desc);
        return 0;
@@ -1194,7 +1206,7 @@ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc,
                desc->pg_mirror_idx = mirror_idx;
        for (;;) {
                nfs_pageio_doio(desc);
-               if (!mirror->pg_recoalesce)
+               if (desc->pg_error < 0 || !mirror->pg_recoalesce)
                        break;
                if (!nfs_do_recoalesce(desc))
                        break;
@@ -1222,9 +1234,8 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
        while (!list_empty(&hdr->pages)) {
                struct nfs_page *req = nfs_list_entry(hdr->pages.next);
 
-               nfs_list_remove_request(req);
                if (!nfs_pageio_add_request(desc, req))
-                       nfs_list_add_request(req, &failed);
+                       nfs_list_move_request(req, &failed);
        }
        nfs_pageio_complete(desc);
        if (!list_empty(&failed)) {
index 53726da5c01008bb321cf600115da8290008b375..8247bd1634cb8c80bee4e4658a2e9754a0ebbf8b 100644 (file)
@@ -758,22 +758,35 @@ static int
 pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
                struct nfs_server *server,
                struct list_head *layout_list)
+       __must_hold(&clp->cl_lock)
+       __must_hold(RCU)
 {
        struct pnfs_layout_hdr *lo, *next;
        struct inode *inode;
 
        list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) {
-               if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags))
+               if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) ||
+                   test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) ||
+                   !list_empty(&lo->plh_bulk_destroy))
                        continue;
+               /* If the sb is being destroyed, just bail */
+               if (!nfs_sb_active(server->super))
+                       break;
                inode = igrab(lo->plh_inode);
-               if (inode == NULL)
-                       continue;
-               list_del_init(&lo->plh_layouts);
-               if (pnfs_layout_add_bulk_destroy_list(inode, layout_list))
-                       continue;
-               rcu_read_unlock();
-               spin_unlock(&clp->cl_lock);
-               iput(inode);
+               if (inode != NULL) {
+                       list_del_init(&lo->plh_layouts);
+                       if (pnfs_layout_add_bulk_destroy_list(inode,
+                                               layout_list))
+                               continue;
+                       rcu_read_unlock();
+                       spin_unlock(&clp->cl_lock);
+                       iput(inode);
+               } else {
+                       rcu_read_unlock();
+                       spin_unlock(&clp->cl_lock);
+                       set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags);
+               }
+               nfs_sb_deactive(server->super);
                spin_lock(&clp->cl_lock);
                rcu_read_lock();
                return -EAGAIN;
@@ -811,7 +824,7 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
                /* Free all lsegs that are attached to commit buckets */
                nfs_commit_inode(inode, 0);
                pnfs_put_layout_hdr(lo);
-               iput(inode);
+               nfs_iput_and_deactive(inode);
        }
        return ret;
 }
index 5e80a07b7beac1d2c21177980c17164fd5d735d5..c0420b979d882cbf0245ecd22df897c3236eba57 100644 (file)
@@ -104,6 +104,7 @@ enum {
        NFS_LAYOUT_RETURN_REQUESTED,    /* Return this layout ASAP */
        NFS_LAYOUT_INVALID_STID,        /* layout stateid id is invalid */
        NFS_LAYOUT_FIRST_LAYOUTGET,     /* Serialize first layoutget */
+       NFS_LAYOUT_INODE_FREEING,       /* The inode is being freed */
 };
 
 enum layoutdriver_policy_flags {
@@ -349,6 +350,7 @@ void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nf
 void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, struct nfs_server *,
                             const struct nfs4_deviceid *);
 bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
+void nfs4_mark_deviceid_available(struct nfs4_deviceid_node *node);
 void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node);
 bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node);
 void nfs4_deviceid_purge_client(const struct nfs_client *);
index 7fb59487ee9049cd3705a3837eaf0296a02824d2..537b80d693f1ef8f9d71de664f9a87c5f0ca176a 100644 (file)
@@ -283,11 +283,23 @@ nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
 }
 EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node);
 
+void
+nfs4_mark_deviceid_available(struct nfs4_deviceid_node *node)
+{
+       if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) {
+               clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags);
+               smp_mb__after_atomic();
+       }
+}
+EXPORT_SYMBOL_GPL(nfs4_mark_deviceid_available);
+
 void
 nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node)
 {
        node->timestamp_unavailable = jiffies;
+       smp_mb__before_atomic();
        set_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags);
+       smp_mb__after_atomic();
 }
 EXPORT_SYMBOL_GPL(nfs4_mark_deviceid_unavailable);
 
@@ -302,6 +314,7 @@ nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node)
                if (time_in_range(node->timestamp_unavailable, start, end))
                        return true;
                clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags);
+               smp_mb__after_atomic();
        }
        return false;
 }
index f9f19784db8279e2cdd6f17cbc6c7bd39d5ad332..1d95a60b2586aede52c7a55b00fe76e921fdf7fe 100644 (file)
@@ -205,7 +205,7 @@ static void nfs_initiate_read(struct nfs_pgio_header *hdr,
 }
 
 static void
-nfs_async_read_error(struct list_head *head)
+nfs_async_read_error(struct list_head *head, int error)
 {
        struct nfs_page *req;
 
index 0570391eaa165cf306d6ec4b31379acfa807b0fb..23790c7b2289d21328db2a824eef5c6484e29089 100644 (file)
@@ -1919,7 +1919,7 @@ static int nfs_parse_devname(const char *dev_name,
                /* kill possible hostname list: not supported */
                comma = strchr(dev_name, ',');
                if (comma != NULL && comma < end)
-                       *comma = 0;
+                       len = comma - dev_name;
        }
 
        if (len > maxnamlen)
index 79b97b3c44275af32ea57d73f9fe7948c0551668..52d5339674850a95a7432444bfa1e65776d5ded0 100644 (file)
@@ -39,6 +39,7 @@ nfs_free_unlinkdata(struct nfs_unlinkdata *data)
 /**
  * nfs_async_unlink_done - Sillydelete post-processing
  * @task: rpc_task of the sillydelete
+ * @calldata: pointer to nfs_unlinkdata
  *
  * Do the directory attribute update.
  */
@@ -54,7 +55,7 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
 
 /**
  * nfs_async_unlink_release - Release the sillydelete data.
- * @task: rpc_task of the sillydelete
+ * @calldata: struct nfs_unlinkdata to release
  *
  * We need to call nfs_put_unlinkdata as a 'tk_release' task since the
  * rpc_task would be freed too.
@@ -159,8 +160,8 @@ static int nfs_call_unlink(struct dentry *dentry, struct inode *inode, struct nf
 
 /**
  * nfs_async_unlink - asynchronous unlinking of a file
- * @dir: parent directory of dentry
- * @dentry: dentry to unlink
+ * @dentry: parent directory of dentry
+ * @name: name of dentry to unlink
  */
 static int
 nfs_async_unlink(struct dentry *dentry, const struct qstr *name)
@@ -324,6 +325,7 @@ static const struct rpc_call_ops nfs_rename_ops = {
  * @new_dir: target directory for the rename
  * @old_dentry: original dentry to be renamed
  * @new_dentry: dentry to which the old_dentry should be renamed
+ * @complete: Function to run on successful completion
  *
  * It's expected that valid references to the dentries and inodes are held
  */
index d09c9f878141a5c4213aef1faccb85b979bf4793..f3ebabaa291dccbf7fda8802bcee89c7fd3600da 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/iversion.h>
 
 #include <linux/uaccess.h>
+#include <linux/sched/mm.h>
 
 #include "delegation.h"
 #include "internal.h"
@@ -712,11 +713,13 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
        struct inode *inode = mapping->host;
        struct nfs_pageio_descriptor pgio;
-       struct nfs_io_completion *ioc = nfs_io_completion_alloc(GFP_NOFS);
+       struct nfs_io_completion *ioc;
+       unsigned int pflags = memalloc_nofs_save();
        int err;
 
        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 
+       ioc = nfs_io_completion_alloc(GFP_NOFS);
        if (ioc)
                nfs_io_completion_init(ioc, nfs_io_completion_commit, inode);
 
@@ -727,6 +730,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
        nfs_pageio_complete(&pgio);
        nfs_io_completion_put(ioc);
 
+       memalloc_nofs_restore(pflags);
+
        if (err < 0)
                goto out_err;
        err = pgio.pg_error;
@@ -865,7 +870,6 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
 /**
  * nfs_request_add_commit_list - add request to a commit list
  * @req: pointer to a struct nfs_page
- * @dst: commit list head
  * @cinfo: holds list lock and accounting info
  *
  * This sets the PG_CLEAN bit, updates the cinfo count of
@@ -1412,20 +1416,27 @@ static void nfs_redirty_request(struct nfs_page *req)
        nfs_release_request(req);
 }
 
-static void nfs_async_write_error(struct list_head *head)
+static void nfs_async_write_error(struct list_head *head, int error)
 {
        struct nfs_page *req;
 
        while (!list_empty(head)) {
                req = nfs_list_entry(head->next);
                nfs_list_remove_request(req);
+               if (nfs_error_is_fatal(error)) {
+                       nfs_context_set_write_error(req->wb_context, error);
+                       if (nfs_error_is_fatal_on_server(error)) {
+                               nfs_write_error_remove_page(req);
+                               continue;
+                       }
+               }
                nfs_redirty_request(req);
        }
 }
 
 static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
 {
-       nfs_async_write_error(&hdr->pages);
+       nfs_async_write_error(&hdr->pages, 0);
        filemap_fdatawrite_range(hdr->inode->i_mapping, hdr->args.offset,
                        hdr->args.offset + hdr->args.count - 1);
 }
index c74e4538d0ebe1e97536e7e7460228eb2580b152..a9d24d5a967c0b5ff2fdb333dae4ab28a610027c 100644 (file)
@@ -60,16 +60,6 @@ struct nfs4_cb_compound_hdr {
        int             status;
 };
 
-/*
- * Handle decode buffer overflows out-of-line.
- */
-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
-{
-       dprintk("NFS: %s prematurely hit the end of our receive buffer. "
-               "Remaining buffer length is %tu words.\n",
-               func, xdr->end - xdr->p);
-}
-
 static __be32 *xdr_encode_empty_array(__be32 *p)
 {
        *p++ = xdr_zero;
@@ -240,7 +230,6 @@ static int decode_cb_op_status(struct xdr_stream *xdr,
        *status = nfs_cb_stat_to_errno(be32_to_cpup(p));
        return 0;
 out_overflow:
-       print_overflow_msg(__func__, xdr);
        return -EIO;
 out_unexpected:
        dprintk("NFSD: Callback server returned operation %d but "
@@ -309,7 +298,6 @@ static int decode_cb_compound4res(struct xdr_stream *xdr,
        hdr->nops = be32_to_cpup(p);
        return 0;
 out_overflow:
-       print_overflow_msg(__func__, xdr);
        return -EIO;
 }
 
@@ -437,7 +425,6 @@ out:
        cb->cb_seq_status = status;
        return status;
 out_overflow:
-       print_overflow_msg(__func__, xdr);
        status = -EIO;
        goto out;
 }
index 1b06f0b284533b025031bd198d244ececfe9b675..22494d1706195d01715d6173c1385fc227eea1e4 100644 (file)
@@ -538,6 +538,7 @@ enum {
        NFSPROC4_CLNT_OFFLOAD_CANCEL,
 
        NFSPROC4_CLNT_LOOKUPP,
+       NFSPROC4_CLNT_LAYOUTERROR,
 };
 
 /* nfs41 types */
index 6aa8cc83c3b66317ca0fd20e26d591b7f29654f8..c827d31298cc379e3bdb07810f98913b24737301 100644 (file)
@@ -261,5 +261,6 @@ struct nfs_server {
 #define NFS_CAP_CLONE          (1U << 23)
 #define NFS_CAP_COPY           (1U << 24)
 #define NFS_CAP_OFFLOAD_CANCEL (1U << 25)
+#define NFS_CAP_LAYOUTERROR    (1U << 26)
 
 #endif
index e27572d30d97751ba70a9c5d753997faaac49d51..ad69430fd0eb5a9123727e2054682971de3feca3 100644 (file)
@@ -164,6 +164,16 @@ nfs_list_add_request(struct nfs_page *req, struct list_head *head)
        list_add_tail(&req->wb_list, head);
 }
 
+/**
+ * nfs_list_move_request - Move a request to a new list
+ * @req: request
+ * @head: head of list into which to insert the request.
+ */
+static inline void
+nfs_list_move_request(struct nfs_page *req, struct list_head *head)
+{
+       list_move_tail(&req->wb_list, head);
+}
 
 /**
  * nfs_list_remove_request - Remove a request from its wb_list
index 441a93ebcac045132113cf0314f22acd4da755d0..9b8324ec08f3191f7ff95944ed0d0cb07df67194 100644 (file)
@@ -383,6 +383,41 @@ struct nfs42_layoutstat_data {
        struct nfs42_layoutstat_res res;
 };
 
+struct nfs42_device_error {
+       struct nfs4_deviceid dev_id;
+       int status;
+       enum nfs_opnum4 opnum;
+};
+
+struct nfs42_layout_error {
+       __u64 offset;
+       __u64 length;
+       nfs4_stateid stateid;
+       struct nfs42_device_error errors[1];
+};
+
+#define NFS42_LAYOUTERROR_MAX 5
+
+struct nfs42_layouterror_args {
+       struct nfs4_sequence_args seq_args;
+       struct inode *inode;
+       unsigned int num_errors;
+       struct nfs42_layout_error errors[NFS42_LAYOUTERROR_MAX];
+};
+
+struct nfs42_layouterror_res {
+       struct nfs4_sequence_res seq_res;
+       unsigned int num_errors;
+       int rpc_status;
+};
+
+struct nfs42_layouterror_data {
+       struct nfs42_layouterror_args args;
+       struct nfs42_layouterror_res res;
+       struct inode *inode;
+       struct pnfs_layout_segment *lseg;
+};
+
 struct nfs42_clone_args {
        struct nfs4_sequence_args       seq_args;
        struct nfs_fh                   *src_fh;
@@ -1549,7 +1584,7 @@ struct nfs_commit_data {
 };
 
 struct nfs_pgio_completion_ops {
-       void    (*error_cleanup)(struct list_head *head);
+       void    (*error_cleanup)(struct list_head *head, int);
        void    (*init_hdr)(struct nfs_pgio_header *hdr);
        void    (*completion)(struct nfs_pgio_header *hdr);
        void    (*reschedule_io)(struct nfs_pgio_header *hdr);
index eed3cb16ccf1236cfed4a8b505ae18e15c96166b..5f9076fdb0901fa50c132426688bc22db0805781 100644 (file)
@@ -74,14 +74,12 @@ struct rpc_cred_cache;
 struct rpc_authops;
 struct rpc_auth {
        unsigned int            au_cslack;      /* call cred size estimate */
-                               /* guess at number of u32's auth adds before
-                                * reply data; normally the verifier size: */
-       unsigned int            au_rslack;
-                               /* for gss, used to calculate au_rslack: */
-       unsigned int            au_verfsize;
-
-       unsigned int            au_flags;       /* various flags */
-       const struct rpc_authops *au_ops;               /* operations */
+       unsigned int            au_rslack;      /* reply cred size estimate */
+       unsigned int            au_verfsize;    /* size of reply verifier */
+       unsigned int            au_ralign;      /* words before UL header */
+
+       unsigned int            au_flags;
+       const struct rpc_authops *au_ops;
        rpc_authflavor_t        au_flavor;      /* pseudoflavor (note may
                                                 * differ from the flavor in
                                                 * au_ops->au_flavor in gss
@@ -131,13 +129,15 @@ struct rpc_credops {
        void                    (*crdestroy)(struct rpc_cred *);
 
        int                     (*crmatch)(struct auth_cred *, struct rpc_cred *, int);
-       __be32 *                (*crmarshal)(struct rpc_task *, __be32 *);
+       int                     (*crmarshal)(struct rpc_task *task,
+                                            struct xdr_stream *xdr);
        int                     (*crrefresh)(struct rpc_task *);
-       __be32 *                (*crvalidate)(struct rpc_task *, __be32 *);
-       int                     (*crwrap_req)(struct rpc_task *, kxdreproc_t,
-                                               void *, __be32 *, void *);
-       int                     (*crunwrap_resp)(struct rpc_task *, kxdrdproc_t,
-                                               void *, __be32 *, void *);
+       int                     (*crvalidate)(struct rpc_task *task,
+                                             struct xdr_stream *xdr);
+       int                     (*crwrap_req)(struct rpc_task *task,
+                                             struct xdr_stream *xdr);
+       int                     (*crunwrap_resp)(struct rpc_task *task,
+                                                struct xdr_stream *xdr);
        int                     (*crkey_timeout)(struct rpc_cred *);
        char *                  (*crstringify_acceptor)(struct rpc_cred *);
        bool                    (*crneed_reencode)(struct rpc_task *);
@@ -165,10 +165,18 @@ struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *
 void                   rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
 struct rpc_cred *      rpcauth_lookupcred(struct rpc_auth *, int);
 void                   put_rpccred(struct rpc_cred *);
-__be32 *               rpcauth_marshcred(struct rpc_task *, __be32 *);
-__be32 *               rpcauth_checkverf(struct rpc_task *, __be32 *);
-int                    rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj);
-int                    rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj);
+int                    rpcauth_marshcred(struct rpc_task *task,
+                                         struct xdr_stream *xdr);
+int                    rpcauth_checkverf(struct rpc_task *task,
+                                         struct xdr_stream *xdr);
+int                    rpcauth_wrap_req_encode(struct rpc_task *task,
+                                               struct xdr_stream *xdr);
+int                    rpcauth_wrap_req(struct rpc_task *task,
+                                        struct xdr_stream *xdr);
+int                    rpcauth_unwrap_resp_decode(struct rpc_task *task,
+                                                  struct xdr_stream *xdr);
+int                    rpcauth_unwrap_resp(struct rpc_task *task,
+                                           struct xdr_stream *xdr);
 bool                   rpcauth_xmit_need_reencode(struct rpc_task *task);
 int                    rpcauth_refreshcred(struct rpc_task *);
 void                   rpcauth_invalcred(struct rpc_task *);
index 1c441714d569bcb46ecae21cf96c882a7a235502..98bc9883b23096430cf7bcd36d97ea374cb88e69 100644 (file)
@@ -169,6 +169,9 @@ int         rpcb_v4_register(struct net *net, const u32 program,
                                 const char *netid);
 void           rpcb_getport_async(struct rpc_task *);
 
+void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
+                            unsigned int base, unsigned int len,
+                            unsigned int hdrsize);
 void           rpc_call_start(struct rpc_task *);
 int            rpc_call_async(struct rpc_clnt *clnt,
                               const struct rpc_message *msg, int flags,
index ec6234eee89c47ec1330c108d87daa3b33db1740..981c89cef19d6a0fd2097b95ec2b0a1c797aad74 100644 (file)
@@ -1,4 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Dumb way to share this static piece of information with nfsd
+ * Define the string that exports the set of kernel-supported
+ * Kerberos enctypes. This list is sent via upcall to gssd, and
+ * is also exposed via the nfsd /proc API. The consumers generally
+ * treat this as an ordered list, where the first item in the list
+ * is the most preferred.
+ */
+
+#ifndef _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H
+#define _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H
+
+#ifdef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
+
+/*
+ * NB: This list includes encryption types that were deprecated
+ * by RFC 8429 (DES3_CBC_SHA1 and ARCFOUR_HMAC).
+ *
+ * ENCTYPE_AES256_CTS_HMAC_SHA1_96
+ * ENCTYPE_AES128_CTS_HMAC_SHA1_96
+ * ENCTYPE_DES3_CBC_SHA1
+ * ENCTYPE_ARCFOUR_HMAC
+ */
+#define KRB5_SUPPORTED_ENCTYPES "18,17,16,23"
+
+#else  /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */
+
+/*
+ * NB: This list includes encryption types that were deprecated
+ * by RFC 8429 and RFC 6649.
+ *
+ * ENCTYPE_AES256_CTS_HMAC_SHA1_96
+ * ENCTYPE_AES128_CTS_HMAC_SHA1_96
+ * ENCTYPE_DES3_CBC_SHA1
+ * ENCTYPE_ARCFOUR_HMAC
+ * ENCTYPE_DES_CBC_MD5
+ * ENCTYPE_DES_CBC_CRC
+ * ENCTYPE_DES_CBC_MD4
  */
 #define KRB5_SUPPORTED_ENCTYPES "18,17,16,23,3,1,2"
+
+#endif /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */
+
+#endif /* _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H */
index 219aa3910a0c10fa6013c7df83013b5802d186ee..ec861cd0cfe8ce9fa5425909d243c660231f06c9 100644 (file)
@@ -97,6 +97,7 @@ typedef void                  (*rpc_action)(struct rpc_task *);
 
 struct rpc_call_ops {
        void (*rpc_call_prepare)(struct rpc_task *, void *);
+       void (*rpc_call_prepare_transmit)(struct rpc_task *, void *);
        void (*rpc_call_done)(struct rpc_task *, void *);
        void (*rpc_count_stats)(struct rpc_task *, void *);
        void (*rpc_release)(void *);
@@ -303,4 +304,12 @@ rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
 }
 #endif /* CONFIG_SUNRPC_SWAP */
 
+static inline bool
+rpc_task_need_resched(const struct rpc_task *task)
+{
+       if (RPC_IS_QUEUED(task) || task->tk_callback)
+               return true;
+       return false;
+}
+
 #endif /* _LINUX_SUNRPC_SCHED_H_ */
index 2ec1280602390efe6e5c71413a7a731f6e98f398..9ee3970ba59c31385e3b20a277d560a48bee3fc1 100644 (file)
@@ -87,6 +87,16 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
 #define        xdr_one         cpu_to_be32(1)
 #define        xdr_two         cpu_to_be32(2)
 
+#define        rpc_auth_null   cpu_to_be32(RPC_AUTH_NULL)
+#define        rpc_auth_unix   cpu_to_be32(RPC_AUTH_UNIX)
+#define        rpc_auth_short  cpu_to_be32(RPC_AUTH_SHORT)
+#define        rpc_auth_gss    cpu_to_be32(RPC_AUTH_GSS)
+
+#define        rpc_call        cpu_to_be32(RPC_CALL)
+#define        rpc_reply       cpu_to_be32(RPC_REPLY)
+
+#define        rpc_msg_accepted        cpu_to_be32(RPC_MSG_ACCEPTED)
+
 #define        rpc_success             cpu_to_be32(RPC_SUCCESS)
 #define        rpc_prog_unavail        cpu_to_be32(RPC_PROG_UNAVAIL)
 #define        rpc_prog_mismatch       cpu_to_be32(RPC_PROG_MISMATCH)
@@ -95,6 +105,9 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
 #define        rpc_system_err          cpu_to_be32(RPC_SYSTEM_ERR)
 #define        rpc_drop_reply          cpu_to_be32(RPC_DROP_REPLY)
 
+#define        rpc_mismatch            cpu_to_be32(RPC_MISMATCH)
+#define        rpc_auth_error          cpu_to_be32(RPC_AUTH_ERROR)
+
 #define        rpc_auth_ok             cpu_to_be32(RPC_AUTH_OK)
 #define        rpc_autherr_badcred     cpu_to_be32(RPC_AUTH_BADCRED)
 #define        rpc_autherr_rejectedcred cpu_to_be32(RPC_AUTH_REJECTEDCRED)
@@ -103,7 +116,6 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
 #define        rpc_autherr_tooweak     cpu_to_be32(RPC_AUTH_TOOWEAK)
 #define        rpcsec_gsserr_credproblem       cpu_to_be32(RPCSEC_GSS_CREDPROBLEM)
 #define        rpcsec_gsserr_ctxproblem        cpu_to_be32(RPCSEC_GSS_CTXPROBLEM)
-#define        rpc_autherr_oldseqnum   cpu_to_be32(101)
 
 /*
  * Miscellaneous XDR helper functions
@@ -167,7 +179,6 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p)
 extern void xdr_shift_buf(struct xdr_buf *, size_t);
 extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
 extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int);
-extern void xdr_buf_trim(struct xdr_buf *, unsigned int);
 extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int);
 extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
 extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
@@ -217,6 +228,8 @@ struct xdr_stream {
        struct kvec scratch;    /* Scratch buffer */
        struct page **page_ptr; /* pointer to the current page */
        unsigned int nwords;    /* Remaining decode buffer length */
+
+       struct rpc_rqst *rqst;  /* For debugging */
 };
 
 /*
@@ -227,7 +240,8 @@ typedef void        (*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 typedef int    (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
                void *obj);
 
-extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf,
+                           __be32 *p, struct rpc_rqst *rqst);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_commit_encode(struct xdr_stream *xdr);
 extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
@@ -235,7 +249,8 @@ extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
                unsigned int base, unsigned int len);
 extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr);
-extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf,
+                           __be32 *p, struct rpc_rqst *rqst);
 extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
                struct page **pages, unsigned int len);
 extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen);
index ad7e910b119dff15bf09af1e614577e05499c9d3..3a391544299e8dc1c39f85cb4d81ff491391c7d5 100644 (file)
@@ -196,8 +196,6 @@ struct rpc_xprt {
 
        size_t                  max_payload;    /* largest RPC payload size,
                                                   in bytes */
-       unsigned int            tsh_size;       /* size of transport specific
-                                                  header */
 
        struct rpc_wait_queue   binding;        /* requests waiting on rpcbind */
        struct rpc_wait_queue   sending;        /* requests waiting to send */
@@ -362,11 +360,6 @@ struct rpc_xprt *  xprt_alloc(struct net *net, size_t size,
                                unsigned int max_req);
 void                   xprt_free(struct rpc_xprt *);
 
-static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
-{
-       return p + xprt->tsh_size;
-}
-
 static inline int
 xprt_enable_swap(struct rpc_xprt *xprt)
 {
index 458bfe0137f5ec818dca7f1da7deb264940652d8..b81d0b3e0799f43c67057bdbbdb20467205f96be 100644 (file)
@@ -26,6 +26,7 @@ struct sock_xprt {
         */
        struct socket *         sock;
        struct sock *           inet;
+       struct file *           file;
 
        /*
         * State of TCP reply receive
diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h
new file mode 100644 (file)
index 0000000..d1f7fe1
--- /dev/null
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018 Oracle.  All rights reserved.
+ *
+ * Trace point definitions for the "rpcgss" subsystem.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rpcgss
+
+#if !defined(_TRACE_RPCRDMA_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RPCGSS_H
+
+#include <linux/tracepoint.h>
+
+/**
+ ** GSS-API related trace events
+ **/
+
+TRACE_DEFINE_ENUM(GSS_S_BAD_MECH);
+TRACE_DEFINE_ENUM(GSS_S_BAD_NAME);
+TRACE_DEFINE_ENUM(GSS_S_BAD_NAMETYPE);
+TRACE_DEFINE_ENUM(GSS_S_BAD_BINDINGS);
+TRACE_DEFINE_ENUM(GSS_S_BAD_STATUS);
+TRACE_DEFINE_ENUM(GSS_S_BAD_SIG);
+TRACE_DEFINE_ENUM(GSS_S_NO_CRED);
+TRACE_DEFINE_ENUM(GSS_S_NO_CONTEXT);
+TRACE_DEFINE_ENUM(GSS_S_DEFECTIVE_TOKEN);
+TRACE_DEFINE_ENUM(GSS_S_DEFECTIVE_CREDENTIAL);
+TRACE_DEFINE_ENUM(GSS_S_CREDENTIALS_EXPIRED);
+TRACE_DEFINE_ENUM(GSS_S_CONTEXT_EXPIRED);
+TRACE_DEFINE_ENUM(GSS_S_FAILURE);
+TRACE_DEFINE_ENUM(GSS_S_BAD_QOP);
+TRACE_DEFINE_ENUM(GSS_S_UNAUTHORIZED);
+TRACE_DEFINE_ENUM(GSS_S_UNAVAILABLE);
+TRACE_DEFINE_ENUM(GSS_S_DUPLICATE_ELEMENT);
+TRACE_DEFINE_ENUM(GSS_S_NAME_NOT_MN);
+TRACE_DEFINE_ENUM(GSS_S_CONTINUE_NEEDED);
+TRACE_DEFINE_ENUM(GSS_S_DUPLICATE_TOKEN);
+TRACE_DEFINE_ENUM(GSS_S_OLD_TOKEN);
+TRACE_DEFINE_ENUM(GSS_S_UNSEQ_TOKEN);
+TRACE_DEFINE_ENUM(GSS_S_GAP_TOKEN);
+
+#define show_gss_status(x)                                             \
+       __print_flags(x, "|",                                           \
+               { GSS_S_BAD_MECH, "GSS_S_BAD_MECH" },                   \
+               { GSS_S_BAD_NAME, "GSS_S_BAD_NAME" },                   \
+               { GSS_S_BAD_NAMETYPE, "GSS_S_BAD_NAMETYPE" },           \
+               { GSS_S_BAD_BINDINGS, "GSS_S_BAD_BINDINGS" },           \
+               { GSS_S_BAD_STATUS, "GSS_S_BAD_STATUS" },               \
+               { GSS_S_BAD_SIG, "GSS_S_BAD_SIG" },                     \
+               { GSS_S_NO_CRED, "GSS_S_NO_CRED" },                     \
+               { GSS_S_NO_CONTEXT, "GSS_S_NO_CONTEXT" },               \
+               { GSS_S_DEFECTIVE_TOKEN, "GSS_S_DEFECTIVE_TOKEN" },     \
+               { GSS_S_DEFECTIVE_CREDENTIAL, "GSS_S_DEFECTIVE_CREDENTIAL" }, \
+               { GSS_S_CREDENTIALS_EXPIRED, "GSS_S_CREDENTIALS_EXPIRED" }, \
+               { GSS_S_CONTEXT_EXPIRED, "GSS_S_CONTEXT_EXPIRED" },     \
+               { GSS_S_FAILURE, "GSS_S_FAILURE" },                     \
+               { GSS_S_BAD_QOP, "GSS_S_BAD_QOP" },                     \
+               { GSS_S_UNAUTHORIZED, "GSS_S_UNAUTHORIZED" },           \
+               { GSS_S_UNAVAILABLE, "GSS_S_UNAVAILABLE" },             \
+               { GSS_S_DUPLICATE_ELEMENT, "GSS_S_DUPLICATE_ELEMENT" }, \
+               { GSS_S_NAME_NOT_MN, "GSS_S_NAME_NOT_MN" },             \
+               { GSS_S_CONTINUE_NEEDED, "GSS_S_CONTINUE_NEEDED" },     \
+               { GSS_S_DUPLICATE_TOKEN, "GSS_S_DUPLICATE_TOKEN" },     \
+               { GSS_S_OLD_TOKEN, "GSS_S_OLD_TOKEN" },                 \
+               { GSS_S_UNSEQ_TOKEN, "GSS_S_UNSEQ_TOKEN" },             \
+               { GSS_S_GAP_TOKEN, "GSS_S_GAP_TOKEN" })
+
+
+DECLARE_EVENT_CLASS(rpcgss_gssapi_event,
+       TP_PROTO(
+               const struct rpc_task *task,
+               u32 maj_stat
+       ),
+
+       TP_ARGS(task, maj_stat),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
+               __field(u32, maj_stat)
+
+       ),
+
+       TP_fast_assign(
+               __entry->task_id = task->tk_pid;
+               __entry->client_id = task->tk_client->cl_clid;
+               __entry->maj_stat = maj_stat;
+       ),
+
+       TP_printk("task:%u@%u maj_stat=%s",
+               __entry->task_id, __entry->client_id,
+               __entry->maj_stat == 0 ?
+               "GSS_S_COMPLETE" : show_gss_status(__entry->maj_stat))
+);
+
+#define DEFINE_GSSAPI_EVENT(name)                                      \
+       DEFINE_EVENT(rpcgss_gssapi_event, rpcgss_##name,                \
+                       TP_PROTO(                                       \
+                               const struct rpc_task *task,            \
+                               u32 maj_stat                            \
+                       ),                                              \
+                       TP_ARGS(task, maj_stat))
+
+TRACE_EVENT(rpcgss_import_ctx,
+       TP_PROTO(
+               int status
+       ),
+
+       TP_ARGS(status),
+
+       TP_STRUCT__entry(
+               __field(int, status)
+       ),
+
+       TP_fast_assign(
+               __entry->status = status;
+       ),
+
+       TP_printk("status=%d", __entry->status)
+);
+
+DEFINE_GSSAPI_EVENT(get_mic);
+DEFINE_GSSAPI_EVENT(verify_mic);
+DEFINE_GSSAPI_EVENT(wrap);
+DEFINE_GSSAPI_EVENT(unwrap);
+
+
+/**
+ ** GSS auth unwrap failures
+ **/
+
+TRACE_EVENT(rpcgss_unwrap_failed,
+       TP_PROTO(
+               const struct rpc_task *task
+       ),
+
+       TP_ARGS(task),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
+       ),
+
+       TP_fast_assign(
+               __entry->task_id = task->tk_pid;
+               __entry->client_id = task->tk_client->cl_clid;
+       ),
+
+       TP_printk("task:%u@%u", __entry->task_id, __entry->client_id)
+);
+
+TRACE_EVENT(rpcgss_bad_seqno,
+       TP_PROTO(
+               const struct rpc_task *task,
+               u32 expected,
+               u32 received
+       ),
+
+       TP_ARGS(task, expected, received),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
+               __field(u32, expected)
+               __field(u32, received)
+       ),
+
+       TP_fast_assign(
+               __entry->task_id = task->tk_pid;
+               __entry->client_id = task->tk_client->cl_clid;
+               __entry->expected = expected;
+               __entry->received = received;
+       ),
+
+       TP_printk("task:%u@%u expected seqno %u, received seqno %u",
+               __entry->task_id, __entry->client_id,
+               __entry->expected, __entry->received)
+);
+
+TRACE_EVENT(rpcgss_seqno,
+       TP_PROTO(
+               const struct rpc_task *task
+       ),
+
+       TP_ARGS(task),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
+               __field(u32, xid)
+               __field(u32, seqno)
+       ),
+
+       TP_fast_assign(
+               const struct rpc_rqst *rqst = task->tk_rqstp;
+
+               __entry->task_id = task->tk_pid;
+               __entry->client_id = task->tk_client->cl_clid;
+               __entry->xid = be32_to_cpu(rqst->rq_xid);
+               __entry->seqno = rqst->rq_seqno;
+       ),
+
+       TP_printk("task:%u@%u xid=0x%08x seqno=%u",
+               __entry->task_id, __entry->client_id,
+               __entry->xid, __entry->seqno)
+);
+
+TRACE_EVENT(rpcgss_need_reencode,
+       TP_PROTO(
+               const struct rpc_task *task,
+               u32 seq_xmit,
+               bool ret
+       ),
+
+       TP_ARGS(task, seq_xmit, ret),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
+               __field(u32, xid)
+               __field(u32, seq_xmit)
+               __field(u32, seqno)
+               __field(bool, ret)
+       ),
+
+       TP_fast_assign(
+               __entry->task_id = task->tk_pid;
+               __entry->client_id = task->tk_client->cl_clid;
+               __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
+               __entry->seq_xmit = seq_xmit;
+               __entry->seqno = task->tk_rqstp->rq_seqno;
+               __entry->ret = ret;
+       ),
+
+       TP_printk("task:%u@%u xid=0x%08x rq_seqno=%u seq_xmit=%u reencode %sneeded",
+               __entry->task_id, __entry->client_id,
+               __entry->xid, __entry->seqno, __entry->seq_xmit,
+               __entry->ret ? "" : "un")
+);
+
+/**
+ ** gssd upcall related trace events
+ **/
+
+TRACE_EVENT(rpcgss_upcall_msg,
+       TP_PROTO(
+               const char *buf
+       ),
+
+       TP_ARGS(buf),
+
+       TP_STRUCT__entry(
+               __string(msg, buf)
+       ),
+
+       TP_fast_assign(
+               __assign_str(msg, buf)
+       ),
+
+       TP_printk("msg='%s'", __get_str(msg))
+);
+
+TRACE_EVENT(rpcgss_upcall_result,
+       TP_PROTO(
+               u32 uid,
+               int result
+       ),
+
+       TP_ARGS(uid, result),
+
+       TP_STRUCT__entry(
+               __field(u32, uid)
+               __field(int, result)
+
+       ),
+
+       TP_fast_assign(
+               __entry->uid = uid;
+               __entry->result = result;
+       ),
+
+       TP_printk("for uid %u, result=%d", __entry->uid, __entry->result)
+);
+
+TRACE_EVENT(rpcgss_context,
+       TP_PROTO(
+               unsigned long expiry,
+               unsigned long now,
+               unsigned int timeout,
+               unsigned int len,
+               const u8 *data
+       ),
+
+       TP_ARGS(expiry, now, timeout, len, data),
+
+       TP_STRUCT__entry(
+               __field(unsigned long, expiry)
+               __field(unsigned long, now)
+               __field(unsigned int, timeout)
+               __field(int, len)
+               __string(acceptor, data)
+       ),
+
+       TP_fast_assign(
+               __entry->expiry = expiry;
+               __entry->now = now;
+               __entry->timeout = timeout;
+               __entry->len = len;
+               strncpy(__get_str(acceptor), data, len);
+       ),
+
+       TP_printk("gc_expiry=%lu now=%lu timeout=%u acceptor=%.*s",
+               __entry->expiry, __entry->now, __entry->timeout,
+               __entry->len, __get_str(acceptor))
+);
+
+
+/**
+ ** Miscellaneous events
+ */
+
+TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5);
+TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5I);
+TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5P);
+
+#define show_pseudoflavor(x)                                           \
+       __print_symbolic(x,                                             \
+               { RPC_AUTH_GSS_KRB5, "RPC_AUTH_GSS_KRB5" },             \
+               { RPC_AUTH_GSS_KRB5I, "RPC_AUTH_GSS_KRB5I" },           \
+               { RPC_AUTH_GSS_KRB5P, "RPC_AUTH_GSS_KRB5P" })
+
+
+TRACE_EVENT(rpcgss_createauth,
+       TP_PROTO(
+               unsigned int flavor,
+               int error
+       ),
+
+       TP_ARGS(flavor, error),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, flavor)
+               __field(int, error)
+
+       ),
+
+       TP_fast_assign(
+               __entry->flavor = flavor;
+               __entry->error = error;
+       ),
+
+       TP_printk("flavor=%s error=%d",
+               show_pseudoflavor(__entry->flavor), __entry->error)
+);
+
+
+#endif /* _TRACE_RPCGSS_H */
+
+#include <trace/define_trace.h>
index 399b1aedc927a2786a429d74f044be7fe7af2c6f..962975b4313fa58c0e3d54c3232a7d3d88a2a4b7 100644 (file)
@@ -521,12 +521,18 @@ TRACE_EVENT(xprtrdma_post_send,
 
        TP_STRUCT__entry(
                __field(const void *, req)
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
                __field(int, num_sge)
                __field(int, signaled)
                __field(int, status)
        ),
 
        TP_fast_assign(
+               const struct rpc_rqst *rqst = &req->rl_slot;
+
+               __entry->task_id = rqst->rq_task->tk_pid;
+               __entry->client_id = rqst->rq_task->tk_client->cl_clid;
                __entry->req = req;
                __entry->num_sge = req->rl_sendctx->sc_wr.num_sge;
                __entry->signaled = req->rl_sendctx->sc_wr.send_flags &
@@ -534,9 +540,11 @@ TRACE_EVENT(xprtrdma_post_send,
                __entry->status = status;
        ),
 
-       TP_printk("req=%p, %d SGEs%s, status=%d",
+       TP_printk("task:%u@%u req=%p (%d SGE%s) %sstatus=%d",
+               __entry->task_id, __entry->client_id,
                __entry->req, __entry->num_sge,
-               (__entry->signaled ? ", signaled" : ""),
+               (__entry->num_sge == 1 ? "" : "s"),
+               (__entry->signaled ? "signaled " : ""),
                __entry->status
        )
 );
index 0d5d0d91f861759762214bea030016c6668ce265..8451f30c6a0fe8fa216c5f57a1ae81e8bded2e91 100644 (file)
@@ -77,6 +77,50 @@ TRACE_EVENT(rpc_request,
                )
 );
 
+TRACE_DEFINE_ENUM(RPC_TASK_ASYNC);
+TRACE_DEFINE_ENUM(RPC_TASK_SWAPPER);
+TRACE_DEFINE_ENUM(RPC_CALL_MAJORSEEN);
+TRACE_DEFINE_ENUM(RPC_TASK_ROOTCREDS);
+TRACE_DEFINE_ENUM(RPC_TASK_DYNAMIC);
+TRACE_DEFINE_ENUM(RPC_TASK_KILLED);
+TRACE_DEFINE_ENUM(RPC_TASK_SOFT);
+TRACE_DEFINE_ENUM(RPC_TASK_SOFTCONN);
+TRACE_DEFINE_ENUM(RPC_TASK_SENT);
+TRACE_DEFINE_ENUM(RPC_TASK_TIMEOUT);
+TRACE_DEFINE_ENUM(RPC_TASK_NOCONNECT);
+TRACE_DEFINE_ENUM(RPC_TASK_NO_RETRANS_TIMEOUT);
+
+#define rpc_show_task_flags(flags)                                     \
+       __print_flags(flags, "|",                                       \
+               { RPC_TASK_ASYNC, "ASYNC" },                            \
+               { RPC_TASK_SWAPPER, "SWAPPER" },                        \
+               { RPC_CALL_MAJORSEEN, "MAJORSEEN" },                    \
+               { RPC_TASK_ROOTCREDS, "ROOTCREDS" },                    \
+               { RPC_TASK_DYNAMIC, "DYNAMIC" },                        \
+               { RPC_TASK_KILLED, "KILLED" },                          \
+               { RPC_TASK_SOFT, "SOFT" },                              \
+               { RPC_TASK_SOFTCONN, "SOFTCONN" },                      \
+               { RPC_TASK_SENT, "SENT" },                              \
+               { RPC_TASK_TIMEOUT, "TIMEOUT" },                        \
+               { RPC_TASK_NOCONNECT, "NOCONNECT" },                    \
+               { RPC_TASK_NO_RETRANS_TIMEOUT, "NORTO" })
+
+TRACE_DEFINE_ENUM(RPC_TASK_RUNNING);
+TRACE_DEFINE_ENUM(RPC_TASK_QUEUED);
+TRACE_DEFINE_ENUM(RPC_TASK_ACTIVE);
+TRACE_DEFINE_ENUM(RPC_TASK_NEED_XMIT);
+TRACE_DEFINE_ENUM(RPC_TASK_NEED_RECV);
+TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT);
+
+#define rpc_show_runstate(flags)                                       \
+       __print_flags(flags, "|",                                       \
+               { (1UL << RPC_TASK_RUNNING), "RUNNING" },               \
+               { (1UL << RPC_TASK_QUEUED), "QUEUED" },                 \
+               { (1UL << RPC_TASK_ACTIVE), "ACTIVE" },                 \
+               { (1UL << RPC_TASK_NEED_XMIT), "NEED_XMIT" },           \
+               { (1UL << RPC_TASK_NEED_RECV), "NEED_RECV" },           \
+               { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" })
+
 DECLARE_EVENT_CLASS(rpc_task_running,
 
        TP_PROTO(const struct rpc_task *task, const void *action),
@@ -102,10 +146,10 @@ DECLARE_EVENT_CLASS(rpc_task_running,
                __entry->flags = task->tk_flags;
                ),
 
-       TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d action=%pf",
+       TP_printk("task:%u@%d flags=%s runstate=%s status=%d action=%pf",
                __entry->task_id, __entry->client_id,
-               __entry->flags,
-               __entry->runstate,
+               rpc_show_task_flags(__entry->flags),
+               rpc_show_runstate(__entry->runstate),
                __entry->status,
                __entry->action
                )
@@ -149,10 +193,10 @@ DECLARE_EVENT_CLASS(rpc_task_queued,
                __assign_str(q_name, rpc_qname(q));
                ),
 
-       TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s",
+       TP_printk("task:%u@%d flags=%s runstate=%s status=%d timeout=%lu queue=%s",
                __entry->task_id, __entry->client_id,
-               __entry->flags,
-               __entry->runstate,
+               rpc_show_task_flags(__entry->flags),
+               rpc_show_runstate(__entry->runstate),
                __entry->status,
                __entry->timeout,
                __get_str(q_name)
@@ -169,6 +213,87 @@ DECLARE_EVENT_CLASS(rpc_task_queued,
 DEFINE_RPC_QUEUED_EVENT(sleep);
 DEFINE_RPC_QUEUED_EVENT(wakeup);
 
+DECLARE_EVENT_CLASS(rpc_failure,
+
+       TP_PROTO(const struct rpc_task *task),
+
+       TP_ARGS(task),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
+       ),
+
+       TP_fast_assign(
+               __entry->task_id = task->tk_pid;
+               __entry->client_id = task->tk_client->cl_clid;
+       ),
+
+       TP_printk("task:%u@%u",
+               __entry->task_id, __entry->client_id)
+);
+
+#define DEFINE_RPC_FAILURE(name)                                       \
+       DEFINE_EVENT(rpc_failure, rpc_bad_##name,                       \
+                       TP_PROTO(                                       \
+                               const struct rpc_task *task             \
+                       ),                                              \
+                       TP_ARGS(task))
+
+DEFINE_RPC_FAILURE(callhdr);
+DEFINE_RPC_FAILURE(verifier);
+
+DECLARE_EVENT_CLASS(rpc_reply_event,
+
+       TP_PROTO(
+               const struct rpc_task *task
+       ),
+
+       TP_ARGS(task),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
+               __field(u32, xid)
+               __string(progname, task->tk_client->cl_program->name)
+               __field(u32, version)
+               __string(procname, rpc_proc_name(task))
+               __string(servername, task->tk_xprt->servername)
+       ),
+
+       TP_fast_assign(
+               __entry->task_id = task->tk_pid;
+               __entry->client_id = task->tk_client->cl_clid;
+               __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
+               __assign_str(progname, task->tk_client->cl_program->name)
+               __entry->version = task->tk_client->cl_vers;
+               __assign_str(procname, rpc_proc_name(task))
+               __assign_str(servername, task->tk_xprt->servername)
+       ),
+
+       TP_printk("task:%u@%d server=%s xid=0x%08x %sv%d %s",
+               __entry->task_id, __entry->client_id, __get_str(servername),
+               __entry->xid, __get_str(progname), __entry->version,
+               __get_str(procname))
+)
+
+#define DEFINE_RPC_REPLY_EVENT(name)                                   \
+       DEFINE_EVENT(rpc_reply_event, rpc__##name,                      \
+                       TP_PROTO(                                       \
+                               const struct rpc_task *task             \
+                       ),                                              \
+                       TP_ARGS(task))
+
+DEFINE_RPC_REPLY_EVENT(prog_unavail);
+DEFINE_RPC_REPLY_EVENT(prog_mismatch);
+DEFINE_RPC_REPLY_EVENT(proc_unavail);
+DEFINE_RPC_REPLY_EVENT(garbage_args);
+DEFINE_RPC_REPLY_EVENT(unparsable);
+DEFINE_RPC_REPLY_EVENT(mismatch);
+DEFINE_RPC_REPLY_EVENT(stale_creds);
+DEFINE_RPC_REPLY_EVENT(bad_creds);
+DEFINE_RPC_REPLY_EVENT(auth_tooweak);
+
 TRACE_EVENT(rpc_stats_latency,
 
        TP_PROTO(
@@ -210,6 +335,169 @@ TRACE_EVENT(rpc_stats_latency,
                __entry->backlog, __entry->rtt, __entry->execute)
 );
 
+TRACE_EVENT(rpc_xdr_overflow,
+       TP_PROTO(
+               const struct xdr_stream *xdr,
+               size_t requested
+       ),
+
+       TP_ARGS(xdr, requested),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
+               __field(int, version)
+               __field(size_t, requested)
+               __field(const void *, end)
+               __field(const void *, p)
+               __field(const void *, head_base)
+               __field(size_t, head_len)
+               __field(const void *, tail_base)
+               __field(size_t, tail_len)
+               __field(unsigned int, page_len)
+               __field(unsigned int, len)
+               __string(progname,
+                        xdr->rqst->rq_task->tk_client->cl_program->name)
+               __string(procedure,
+                        xdr->rqst->rq_task->tk_msg.rpc_proc->p_name)
+       ),
+
+       TP_fast_assign(
+               if (xdr->rqst) {
+                       const struct rpc_task *task = xdr->rqst->rq_task;
+
+                       __entry->task_id = task->tk_pid;
+                       __entry->client_id = task->tk_client->cl_clid;
+                       __assign_str(progname,
+                                    task->tk_client->cl_program->name)
+                       __entry->version = task->tk_client->cl_vers;
+                       __assign_str(procedure, task->tk_msg.rpc_proc->p_name)
+               } else {
+                       __entry->task_id = 0;
+                       __entry->client_id = 0;
+                       __assign_str(progname, "unknown")
+                       __entry->version = 0;
+                       __assign_str(procedure, "unknown")
+               }
+               __entry->requested = requested;
+               __entry->end = xdr->end;
+               __entry->p = xdr->p;
+               __entry->head_base = xdr->buf->head[0].iov_base,
+               __entry->head_len = xdr->buf->head[0].iov_len,
+               __entry->page_len = xdr->buf->page_len,
+               __entry->tail_base = xdr->buf->tail[0].iov_base,
+               __entry->tail_len = xdr->buf->tail[0].iov_len,
+               __entry->len = xdr->buf->len;
+       ),
+
+       TP_printk(
+               "task:%u@%u %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
+               __entry->task_id, __entry->client_id,
+               __get_str(progname), __entry->version, __get_str(procedure),
+               __entry->requested, __entry->p, __entry->end,
+               __entry->head_base, __entry->head_len,
+               __entry->page_len,
+               __entry->tail_base, __entry->tail_len,
+               __entry->len
+       )
+);
+
+TRACE_EVENT(rpc_xdr_alignment,
+       TP_PROTO(
+               const struct xdr_stream *xdr,
+               size_t offset,
+               unsigned int copied
+       ),
+
+       TP_ARGS(xdr, offset, copied),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
+               __field(int, version)
+               __field(size_t, offset)
+               __field(unsigned int, copied)
+               __field(const void *, head_base)
+               __field(size_t, head_len)
+               __field(const void *, tail_base)
+               __field(size_t, tail_len)
+               __field(unsigned int, page_len)
+               __field(unsigned int, len)
+               __string(progname,
+                        xdr->rqst->rq_task->tk_client->cl_program->name)
+               __string(procedure,
+                        xdr->rqst->rq_task->tk_msg.rpc_proc->p_name)
+       ),
+
+       TP_fast_assign(
+               const struct rpc_task *task = xdr->rqst->rq_task;
+
+               __entry->task_id = task->tk_pid;
+               __entry->client_id = task->tk_client->cl_clid;
+               __assign_str(progname,
+                            task->tk_client->cl_program->name)
+               __entry->version = task->tk_client->cl_vers;
+               __assign_str(procedure, task->tk_msg.rpc_proc->p_name)
+
+               __entry->offset = offset;
+               __entry->copied = copied;
+               __entry->head_base = xdr->buf->head[0].iov_base,
+               __entry->head_len = xdr->buf->head[0].iov_len,
+               __entry->page_len = xdr->buf->page_len,
+               __entry->tail_base = xdr->buf->tail[0].iov_base,
+               __entry->tail_len = xdr->buf->tail[0].iov_len,
+               __entry->len = xdr->buf->len;
+       ),
+
+       TP_printk(
+               "task:%u@%u %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
+               __entry->task_id, __entry->client_id,
+               __get_str(progname), __entry->version, __get_str(procedure),
+               __entry->offset, __entry->copied,
+               __entry->head_base, __entry->head_len,
+               __entry->page_len,
+               __entry->tail_base, __entry->tail_len,
+               __entry->len
+       )
+);
+
+TRACE_EVENT(rpc_reply_pages,
+       TP_PROTO(
+               const struct rpc_rqst *req
+       ),
+
+       TP_ARGS(req),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
+               __field(const void *, head_base)
+               __field(size_t, head_len)
+               __field(const void *, tail_base)
+               __field(size_t, tail_len)
+               __field(unsigned int, page_len)
+       ),
+
+       TP_fast_assign(
+               __entry->task_id = req->rq_task->tk_pid;
+               __entry->client_id = req->rq_task->tk_client->cl_clid;
+
+               __entry->head_base = req->rq_rcv_buf.head[0].iov_base;
+               __entry->head_len = req->rq_rcv_buf.head[0].iov_len;
+               __entry->page_len = req->rq_rcv_buf.page_len;
+               __entry->tail_base = req->rq_rcv_buf.tail[0].iov_base;
+               __entry->tail_len = req->rq_rcv_buf.tail[0].iov_len;
+       ),
+
+       TP_printk(
+               "task:%u@%u xdr=[%p,%zu]/%u/[%p,%zu]\n",
+               __entry->task_id, __entry->client_id,
+               __entry->head_base, __entry->head_len,
+               __entry->page_len,
+               __entry->tail_base, __entry->tail_len
+       )
+);
+
 /*
  * First define the enums in the below macros to be exported to userspace
  * via TRACE_DEFINE_ENUM().
@@ -404,9 +692,68 @@ DECLARE_EVENT_CLASS(rpc_xprt_event,
 
 DEFINE_RPC_XPRT_EVENT(timer);
 DEFINE_RPC_XPRT_EVENT(lookup_rqst);
-DEFINE_RPC_XPRT_EVENT(transmit);
 DEFINE_RPC_XPRT_EVENT(complete_rqst);
 
+TRACE_EVENT(xprt_transmit,
+       TP_PROTO(
+               const struct rpc_rqst *rqst,
+               int status
+       ),
+
+       TP_ARGS(rqst, status),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
+               __field(u32, xid)
+               __field(u32, seqno)
+               __field(int, status)
+       ),
+
+       TP_fast_assign(
+               __entry->task_id = rqst->rq_task->tk_pid;
+               __entry->client_id = rqst->rq_task->tk_client->cl_clid;
+               __entry->xid = be32_to_cpu(rqst->rq_xid);
+               __entry->seqno = rqst->rq_seqno;
+               __entry->status = status;
+       ),
+
+       TP_printk(
+               "task:%u@%u xid=0x%08x seqno=%u status=%d",
+               __entry->task_id, __entry->client_id, __entry->xid,
+               __entry->seqno, __entry->status)
+);
+
+TRACE_EVENT(xprt_enq_xmit,
+       TP_PROTO(
+               const struct rpc_task *task,
+               int stage
+       ),
+
+       TP_ARGS(task, stage),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
+               __field(u32, xid)
+               __field(u32, seqno)
+               __field(int, stage)
+       ),
+
+       TP_fast_assign(
+               __entry->task_id = task->tk_pid;
+               __entry->client_id = task->tk_client->cl_clid;
+               __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
+               __entry->seqno = task->tk_rqstp->rq_seqno;
+               __entry->stage = stage;
+       ),
+
+       TP_printk(
+               "task:%u@%u xid=0x%08x seqno=%u stage=%d",
+               __entry->task_id, __entry->client_id, __entry->xid,
+               __entry->seqno, __entry->stage)
+);
+
 TRACE_EVENT(xprt_ping,
        TP_PROTO(const struct rpc_xprt *xprt, int status),
 
index ac09ca8032965bfd4280fb3f6d3410c08cbda243..83f5617bae07eda1cdada10022c541d0b6d2d239 100644 (file)
@@ -34,6 +34,22 @@ config RPCSEC_GSS_KRB5
 
          If unsure, say Y.
 
+config CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
+       bool "Secure RPC: Disable insecure Kerberos encryption types"
+       depends on RPCSEC_GSS_KRB5
+       default n
+       help
+         Choose Y here to disable the use of deprecated encryption types
+         with the Kerberos version 5 GSS-API mechanism (RFC 1964). The
+         deprecated encryption types include DES-CBC-MD5, DES-CBC-CRC,
+         and DES-CBC-MD4. These types were deprecated by RFC 6649 because
+         they were found to be insecure.
+
+         N is the default because many sites have deployed KDCs and
+         keytabs that contain only these deprecated encryption types.
+         Choosing Y prevents the use of known-insecure encryption types
+         but might result in compatibility problems.
+
 config SUNRPC_DEBUG
        bool "RPC: Enable dprintk debugging"
        depends on SUNRPC && SYSCTL
index f3023bbc0b7fcc292793fda4469e0c914b6ece93..e7861026b9e52ac07d0e12971d10323be69f7870 100644 (file)
@@ -17,9 +17,7 @@
 #include <linux/sunrpc/gss_api.h>
 #include <linux/spinlock.h>
 
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY       RPCDBG_AUTH
-#endif
+#include <trace/events/sunrpc.h>
 
 #define RPC_CREDCACHE_DEFAULT_HASHBITS (4)
 struct rpc_cred_cache {
@@ -267,8 +265,6 @@ rpcauth_list_flavors(rpc_authflavor_t *array, int size)
                }
        }
        rcu_read_unlock();
-
-       dprintk("RPC:       %s returns %d\n", __func__, result);
        return result;
 }
 EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
@@ -636,9 +632,6 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
        struct rpc_cred *ret;
        const struct cred *cred = current_cred();
 
-       dprintk("RPC:       looking up %s cred\n",
-               auth->au_ops->au_name);
-
        memset(&acred, 0, sizeof(acred));
        acred.cred = cred;
        ret = auth->au_ops->lookup_cred(auth, &acred, flags);
@@ -670,8 +663,6 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
        };
        struct rpc_cred *ret;
 
-       dprintk("RPC: %5u looking up %s cred\n",
-               task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
        ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
        put_cred(acred.cred);
        return ret;
@@ -688,8 +679,6 @@ rpcauth_bind_machine_cred(struct rpc_task *task, int lookupflags)
 
        if (!acred.principal)
                return NULL;
-       dprintk("RPC: %5u looking up %s machine cred\n",
-               task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
        return auth->au_ops->lookup_cred(auth, &acred, lookupflags);
 }
 
@@ -698,8 +687,6 @@ rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
 {
        struct rpc_auth *auth = task->tk_client->cl_auth;
 
-       dprintk("RPC: %5u looking up %s cred\n",
-               task->tk_pid, auth->au_ops->au_name);
        return rpcauth_lookupcred(auth, lookupflags);
 }
 
@@ -771,75 +758,102 @@ destroy:
 }
 EXPORT_SYMBOL_GPL(put_rpccred);
 
-__be32 *
-rpcauth_marshcred(struct rpc_task *task, __be32 *p)
+/**
+ * rpcauth_marshcred - Append RPC credential to end of @xdr
+ * @task: controlling RPC task
+ * @xdr: xdr_stream containing initial portion of RPC Call header
+ *
+ * On success, an appropriate verifier is added to @xdr, @xdr is
+ * updated to point past the verifier, and zero is returned.
+ * Otherwise, @xdr is in an undefined state and a negative errno
+ * is returned.
+ */
+int rpcauth_marshcred(struct rpc_task *task, struct xdr_stream *xdr)
 {
-       struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+       const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
 
-       dprintk("RPC: %5u marshaling %s cred %p\n",
-               task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
-
-       return cred->cr_ops->crmarshal(task, p);
+       return ops->crmarshal(task, xdr);
 }
 
-__be32 *
-rpcauth_checkverf(struct rpc_task *task, __be32 *p)
+/**
+ * rpcauth_wrap_req_encode - XDR encode the RPC procedure
+ * @task: controlling RPC task
+ * @xdr: stream where on-the-wire bytes are to be marshalled
+ *
+ * On success, @xdr contains the encoded and wrapped message.
+ * Otherwise, @xdr is in an undefined state.
+ */
+int rpcauth_wrap_req_encode(struct rpc_task *task, struct xdr_stream *xdr)
 {
-       struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+       kxdreproc_t encode = task->tk_msg.rpc_proc->p_encode;
 
-       dprintk("RPC: %5u validating %s cred %p\n",
-               task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
-
-       return cred->cr_ops->crvalidate(task, p);
+       encode(task->tk_rqstp, xdr, task->tk_msg.rpc_argp);
+       return 0;
 }
+EXPORT_SYMBOL_GPL(rpcauth_wrap_req_encode);
 
-static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
-                                  __be32 *data, void *obj)
+/**
+ * rpcauth_wrap_req - XDR encode and wrap the RPC procedure
+ * @task: controlling RPC task
+ * @xdr: stream where on-the-wire bytes are to be marshalled
+ *
+ * On success, @xdr contains the encoded and wrapped message,
+ * and zero is returned. Otherwise, @xdr is in an undefined
+ * state and a negative errno is returned.
+ */
+int rpcauth_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
 {
-       struct xdr_stream xdr;
+       const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
 
-       xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data);
-       encode(rqstp, &xdr, obj);
+       return ops->crwrap_req(task, xdr);
 }
 
+/**
+ * rpcauth_checkverf - Validate verifier in RPC Reply header
+ * @task: controlling RPC task
+ * @xdr: xdr_stream containing RPC Reply header
+ *
+ * On success, @xdr is updated to point past the verifier and
+ * zero is returned. Otherwise, @xdr is in an undefined state
+ * and a negative errno is returned.
+ */
 int
-rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp,
-               __be32 *data, void *obj)
+rpcauth_checkverf(struct rpc_task *task, struct xdr_stream *xdr)
 {
-       struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+       const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
 
-       dprintk("RPC: %5u using %s cred %p to wrap rpc data\n",
-                       task->tk_pid, cred->cr_ops->cr_name, cred);
-       if (cred->cr_ops->crwrap_req)
-               return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
-       /* By default, we encode the arguments normally. */
-       rpcauth_wrap_req_encode(encode, rqstp, data, obj);
-       return 0;
+       return ops->crvalidate(task, xdr);
 }
 
-static int
-rpcauth_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
-                         __be32 *data, void *obj)
+/**
+ * rpcauth_unwrap_resp_decode - Invoke XDR decode function
+ * @task: controlling RPC task
+ * @xdr: stream where the Reply message resides
+ *
+ * Returns zero on success; otherwise a negative errno is returned.
+ */
+int
+rpcauth_unwrap_resp_decode(struct rpc_task *task, struct xdr_stream *xdr)
 {
-       struct xdr_stream xdr;
+       kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode;
 
-       xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data);
-       return decode(rqstp, &xdr, obj);
+       return decode(task->tk_rqstp, xdr, task->tk_msg.rpc_resp);
 }
+EXPORT_SYMBOL_GPL(rpcauth_unwrap_resp_decode);
 
+/**
+ * rpcauth_unwrap_resp - Invoke unwrap and decode function for the cred
+ * @task: controlling RPC task
+ * @xdr: stream where the Reply message resides
+ *
+ * Returns zero on success; otherwise a negative errno is returned.
+ */
 int
-rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp,
-               __be32 *data, void *obj)
+rpcauth_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr)
 {
-       struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+       const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
 
-       dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n",
-                       task->tk_pid, cred->cr_ops->cr_name, cred);
-       if (cred->cr_ops->crunwrap_resp)
-               return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
-                                                  data, obj);
-       /* By default, we decode the arguments normally. */
-       return rpcauth_unwrap_req_decode(decode, rqstp, data, obj);
+       return ops->crunwrap_resp(task, xdr);
 }
 
 bool
@@ -865,8 +879,6 @@ rpcauth_refreshcred(struct rpc_task *task)
                        goto out;
                cred = task->tk_rqstp->rq_cred;
        }
-       dprintk("RPC: %5u refreshing %s cred %p\n",
-               task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
 
        err = cred->cr_ops->crrefresh(task);
 out:
@@ -880,8 +892,6 @@ rpcauth_invalcred(struct rpc_task *task)
 {
        struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 
-       dprintk("RPC: %5u invalidating %s cred %p\n",
-               task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
        if (cred)
                clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
 }
index c374268b008ff261081415c084c153e293cafda5..4a29f4c5dac4bc4aaceb13cab7272f55881843d3 100644 (file)
@@ -7,7 +7,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
 
 auth_rpcgss-y := auth_gss.o gss_generic_token.o \
        gss_mech_switch.o svcauth_gss.o \
-       gss_rpc_upcall.o gss_rpc_xdr.o
+       gss_rpc_upcall.o gss_rpc_xdr.o trace.o
 
 obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
 
index 1531b0219344d9e47a8dc33509355fefa9e05f5c..3fd56c0c90ae67e589e6af06bf10fa53637df882 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: BSD-3-Clause
 /*
  * linux/net/sunrpc/auth_gss/auth_gss.c
  *
@@ -8,34 +9,8 @@
  *
  *  Dug Song       <dugsong@monkey.org>
  *  Andy Adamson   <andros@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
@@ -55,6 +30,8 @@
 
 #include "../netns.h"
 
+#include <trace/events/rpcgss.h>
+
 static const struct rpc_authops authgss_ops;
 
 static const struct rpc_credops gss_credops;
@@ -260,6 +237,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
        }
        ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS);
        if (ret < 0) {
+               trace_rpcgss_import_ctx(ret);
                p = ERR_PTR(ret);
                goto err;
        }
@@ -275,12 +253,9 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
        if (IS_ERR(p))
                goto err;
 done:
-       dprintk("RPC:       %s Success. gc_expiry %lu now %lu timeout %u acceptor %.*s\n",
-               __func__, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len,
-               ctx->gc_acceptor.data);
-       return p;
+       trace_rpcgss_context(ctx->gc_expiry, now, timeout,
+                            ctx->gc_acceptor.len, ctx->gc_acceptor.data);
 err:
-       dprintk("RPC:       %s returns error %ld\n", __func__, -PTR_ERR(p));
        return p;
 }
 
@@ -354,10 +329,8 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth
                if (auth && pos->auth->service != auth->service)
                        continue;
                refcount_inc(&pos->count);
-               dprintk("RPC:       %s found msg %p\n", __func__, pos);
                return pos;
        }
-       dprintk("RPC:       %s found nothing\n", __func__);
        return NULL;
 }
 
@@ -456,7 +429,7 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
        size_t buflen = sizeof(gss_msg->databuf);
        int len;
 
-       len = scnprintf(p, buflen, "mech=%s uid=%d ", mech->gm_name,
+       len = scnprintf(p, buflen, "mech=%s uid=%d", mech->gm_name,
                        from_kuid(&init_user_ns, gss_msg->uid));
        buflen -= len;
        p += len;
@@ -467,7 +440,7 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
         * identity that we are authenticating to.
         */
        if (target_name) {
-               len = scnprintf(p, buflen, "target=%s ", target_name);
+               len = scnprintf(p, buflen, " target=%s", target_name);
                buflen -= len;
                p += len;
                gss_msg->msg.len += len;
@@ -487,11 +460,11 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
                char *c = strchr(service_name, '@');
 
                if (!c)
-                       len = scnprintf(p, buflen, "service=%s ",
+                       len = scnprintf(p, buflen, " service=%s",
                                        service_name);
                else
                        len = scnprintf(p, buflen,
-                                       "service=%.*s srchost=%s ",
+                                       " service=%.*s srchost=%s",
                                        (int)(c - service_name),
                                        service_name, c + 1);
                buflen -= len;
@@ -500,17 +473,17 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
        }
 
        if (mech->gm_upcall_enctypes) {
-               len = scnprintf(p, buflen, "enctypes=%s ",
+               len = scnprintf(p, buflen, " enctypes=%s",
                                mech->gm_upcall_enctypes);
                buflen -= len;
                p += len;
                gss_msg->msg.len += len;
        }
+       trace_rpcgss_upcall_msg(gss_msg->databuf);
        len = scnprintf(p, buflen, "\n");
        if (len == 0)
                goto out_overflow;
        gss_msg->msg.len += len;
-
        gss_msg->msg.data = gss_msg->databuf;
        return 0;
 out_overflow:
@@ -603,8 +576,6 @@ gss_refresh_upcall(struct rpc_task *task)
        struct rpc_pipe *pipe;
        int err = 0;
 
-       dprintk("RPC: %5u %s for uid %u\n",
-               task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid));
        gss_msg = gss_setup_upcall(gss_auth, cred);
        if (PTR_ERR(gss_msg) == -EAGAIN) {
                /* XXX: warning on the first, under the assumption we
@@ -612,7 +583,8 @@ gss_refresh_upcall(struct rpc_task *task)
                warn_gssd();
                task->tk_timeout = 15*HZ;
                rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL);
-               return -EAGAIN;
+               err = -EAGAIN;
+               goto out;
        }
        if (IS_ERR(gss_msg)) {
                err = PTR_ERR(gss_msg);
@@ -635,9 +607,8 @@ gss_refresh_upcall(struct rpc_task *task)
        spin_unlock(&pipe->lock);
        gss_release_msg(gss_msg);
 out:
-       dprintk("RPC: %5u %s for uid %u result %d\n",
-               task->tk_pid, __func__,
-               from_kuid(&init_user_ns, cred->cr_cred->fsuid), err);
+       trace_rpcgss_upcall_result(from_kuid(&init_user_ns,
+                                            cred->cr_cred->fsuid), err);
        return err;
 }
 
@@ -652,14 +623,13 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
        DEFINE_WAIT(wait);
        int err;
 
-       dprintk("RPC:       %s for uid %u\n",
-               __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid));
 retry:
        err = 0;
        /* if gssd is down, just skip upcalling altogether */
        if (!gssd_running(net)) {
                warn_gssd();
-               return -EACCES;
+               err = -EACCES;
+               goto out;
        }
        gss_msg = gss_setup_upcall(gss_auth, cred);
        if (PTR_ERR(gss_msg) == -EAGAIN) {
@@ -700,8 +670,8 @@ out_intr:
        finish_wait(&gss_msg->waitqueue, &wait);
        gss_release_msg(gss_msg);
 out:
-       dprintk("RPC:       %s for uid %u result %d\n",
-               __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid), err);
+       trace_rpcgss_upcall_result(from_kuid(&init_user_ns,
+                                            cred->cr_cred->fsuid), err);
        return err;
 }
 
@@ -794,7 +764,6 @@ err_put_ctx:
 err:
        kfree(buf);
 out:
-       dprintk("RPC:       %s returning %zd\n", __func__, err);
        return err;
 }
 
@@ -863,8 +832,6 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
        struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg);
 
        if (msg->errno < 0) {
-               dprintk("RPC:       %s releasing msg %p\n",
-                       __func__, gss_msg);
                refcount_inc(&gss_msg->count);
                gss_unhash_msg(gss_msg);
                if (msg->errno == -ETIMEDOUT)
@@ -1024,8 +991,6 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
        struct rpc_auth * auth;
        int err = -ENOMEM; /* XXX? */
 
-       dprintk("RPC:       creating GSS authenticator for client %p\n", clnt);
-
        if (!try_module_get(THIS_MODULE))
                return ERR_PTR(err);
        if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
@@ -1041,10 +1006,8 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
        gss_auth->net = get_net(rpc_net_ns(clnt));
        err = -EINVAL;
        gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
-       if (!gss_auth->mech) {
-               dprintk("RPC:       Pseudoflavor %d not found!\n", flavor);
+       if (!gss_auth->mech)
                goto err_put_net;
-       }
        gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
        if (gss_auth->service == 0)
                goto err_put_mech;
@@ -1053,6 +1016,8 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
        auth = &gss_auth->rpc_auth;
        auth->au_cslack = GSS_CRED_SLACK >> 2;
        auth->au_rslack = GSS_VERF_SLACK >> 2;
+       auth->au_verfsize = GSS_VERF_SLACK >> 2;
+       auth->au_ralign = GSS_VERF_SLACK >> 2;
        auth->au_flags = 0;
        auth->au_ops = &authgss_ops;
        auth->au_flavor = flavor;
@@ -1099,6 +1064,7 @@ err_free:
        kfree(gss_auth);
 out_dec:
        module_put(THIS_MODULE);
+       trace_rpcgss_createauth(flavor, err);
        return ERR_PTR(err);
 }
 
@@ -1135,9 +1101,6 @@ gss_destroy(struct rpc_auth *auth)
        struct gss_auth *gss_auth = container_of(auth,
                        struct gss_auth, rpc_auth);
 
-       dprintk("RPC:       destroying GSS authenticator %p flavor %d\n",
-                       auth, auth->au_flavor);
-
        if (hash_hashed(&gss_auth->hash)) {
                spin_lock(&gss_auth_hash_lock);
                hash_del(&gss_auth->hash);
@@ -1245,7 +1208,7 @@ gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
        struct gss_cred *new;
 
        /* Make a copy of the cred so that we can reference count it */
-       new = kzalloc(sizeof(*gss_cred), GFP_NOIO);
+       new = kzalloc(sizeof(*gss_cred), GFP_NOFS);
        if (new) {
                struct auth_cred acred = {
                        .cred = gss_cred->gc_base.cr_cred,
@@ -1300,8 +1263,6 @@ gss_send_destroy_context(struct rpc_cred *cred)
 static void
 gss_do_free_ctx(struct gss_cl_ctx *ctx)
 {
-       dprintk("RPC:       %s\n", __func__);
-
        gss_delete_sec_context(&ctx->gc_gss_ctx);
        kfree(ctx->gc_wire_ctx.data);
        kfree(ctx->gc_acceptor.data);
@@ -1324,7 +1285,6 @@ gss_free_ctx(struct gss_cl_ctx *ctx)
 static void
 gss_free_cred(struct gss_cred *gss_cred)
 {
-       dprintk("RPC:       %s cred=%p\n", __func__, gss_cred);
        kfree(gss_cred);
 }
 
@@ -1381,10 +1341,6 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
        struct gss_cred *cred = NULL;
        int err = -ENOMEM;
 
-       dprintk("RPC:       %s for uid %d, flavor %d\n",
-               __func__, from_kuid(&init_user_ns, acred->cred->fsuid),
-               auth->au_flavor);
-
        if (!(cred = kzalloc(sizeof(*cred), gfp)))
                goto out_err;
 
@@ -1400,7 +1356,6 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
        return &cred->gc_base;
 
 out_err:
-       dprintk("RPC:       %s failed with error %d\n", __func__, err);
        return ERR_PTR(err);
 }
 
@@ -1526,69 +1481,84 @@ out:
 }
 
 /*
-* Marshal credentials.
-* Maybe we should keep a cached credential for performance reasons.
-*/
-static __be32 *
-gss_marshal(struct rpc_task *task, __be32 *p)
+ * Marshal credentials.
+ *
+ * The expensive part is computing the verifier. We can't cache a
+ * pre-computed version of the verifier because the seqno, which
+ * is different every time, is included in the MIC.
+ */
+static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr)
 {
        struct rpc_rqst *req = task->tk_rqstp;
        struct rpc_cred *cred = req->rq_cred;
        struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
                                                 gc_base);
        struct gss_cl_ctx       *ctx = gss_cred_get_ctx(cred);
-       __be32          *cred_len;
+       __be32          *p, *cred_len;
        u32             maj_stat = 0;
        struct xdr_netobj mic;
        struct kvec     iov;
        struct xdr_buf  verf_buf;
+       int status;
 
-       dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
+       /* Credential */
 
-       *p++ = htonl(RPC_AUTH_GSS);
+       p = xdr_reserve_space(xdr, 7 * sizeof(*p) +
+                             ctx->gc_wire_ctx.len);
+       if (!p)
+               goto marshal_failed;
+       *p++ = rpc_auth_gss;
        cred_len = p++;
 
        spin_lock(&ctx->gc_seq_lock);
        req->rq_seqno = (ctx->gc_seq < MAXSEQ) ? ctx->gc_seq++ : MAXSEQ;
        spin_unlock(&ctx->gc_seq_lock);
        if (req->rq_seqno == MAXSEQ)
-               goto out_expired;
+               goto expired;
+       trace_rpcgss_seqno(task);
 
-       *p++ = htonl((u32) RPC_GSS_VERSION);
-       *p++ = htonl((u32) ctx->gc_proc);
-       *p++ = htonl((u32) req->rq_seqno);
-       *p++ = htonl((u32) gss_cred->gc_service);
+       *p++ = cpu_to_be32(RPC_GSS_VERSION);
+       *p++ = cpu_to_be32(ctx->gc_proc);
+       *p++ = cpu_to_be32(req->rq_seqno);
+       *p++ = cpu_to_be32(gss_cred->gc_service);
        p = xdr_encode_netobj(p, &ctx->gc_wire_ctx);
-       *cred_len = htonl((p - (cred_len + 1)) << 2);
+       *cred_len = cpu_to_be32((p - (cred_len + 1)) << 2);
+
+       /* Verifier */
 
        /* We compute the checksum for the verifier over the xdr-encoded bytes
         * starting with the xid and ending at the end of the credential: */
-       iov.iov_base = xprt_skip_transport_header(req->rq_xprt,
-                                       req->rq_snd_buf.head[0].iov_base);
+       iov.iov_base = req->rq_snd_buf.head[0].iov_base;
        iov.iov_len = (u8 *)p - (u8 *)iov.iov_base;
        xdr_buf_from_iov(&iov, &verf_buf);
 
-       /* set verifier flavor*/
-       *p++ = htonl(RPC_AUTH_GSS);
-
+       p = xdr_reserve_space(xdr, sizeof(*p));
+       if (!p)
+               goto marshal_failed;
+       *p++ = rpc_auth_gss;
        mic.data = (u8 *)(p + 1);
        maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
-       if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
-               goto out_expired;
-       } else if (maj_stat != 0) {
-               pr_warn("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
-               task->tk_status = -EIO;
-               goto out_put_ctx;
-       }
-       p = xdr_encode_opaque(p, NULL, mic.len);
+       if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+               goto expired;
+       else if (maj_stat != 0)
+               goto bad_mic;
+       if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0)
+               goto marshal_failed;
+       status = 0;
+out:
        gss_put_ctx(ctx);
-       return p;
-out_expired:
+       return status;
+expired:
        clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
-       task->tk_status = -EKEYEXPIRED;
-out_put_ctx:
-       gss_put_ctx(ctx);
-       return NULL;
+       status = -EKEYEXPIRED;
+       goto out;
+marshal_failed:
+       status = -EMSGSIZE;
+       goto out;
+bad_mic:
+       trace_rpcgss_get_mic(task, maj_stat);
+       status = -EIO;
+       goto out;
 }
 
 static int gss_renew_cred(struct rpc_task *task)
@@ -1662,116 +1632,105 @@ gss_refresh_null(struct rpc_task *task)
        return 0;
 }
 
-static __be32 *
-gss_validate(struct rpc_task *task, __be32 *p)
+static int
+gss_validate(struct rpc_task *task, struct xdr_stream *xdr)
 {
        struct rpc_cred *cred = task->tk_rqstp->rq_cred;
        struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
-       __be32          *seq = NULL;
+       __be32          *p, *seq = NULL;
        struct kvec     iov;
        struct xdr_buf  verf_buf;
        struct xdr_netobj mic;
-       u32             flav,len;
-       u32             maj_stat;
-       __be32          *ret = ERR_PTR(-EIO);
+       u32             len, maj_stat;
+       int             status;
 
-       dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
+       p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+       if (!p)
+               goto validate_failed;
+       if (*p++ != rpc_auth_gss)
+               goto validate_failed;
+       len = be32_to_cpup(p);
+       if (len > RPC_MAX_AUTH_SIZE)
+               goto validate_failed;
+       p = xdr_inline_decode(xdr, len);
+       if (!p)
+               goto validate_failed;
 
-       flav = ntohl(*p++);
-       if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE)
-               goto out_bad;
-       if (flav != RPC_AUTH_GSS)
-               goto out_bad;
        seq = kmalloc(4, GFP_NOFS);
        if (!seq)
-               goto out_bad;
-       *seq = htonl(task->tk_rqstp->rq_seqno);
+               goto validate_failed;
+       *seq = cpu_to_be32(task->tk_rqstp->rq_seqno);
        iov.iov_base = seq;
        iov.iov_len = 4;
        xdr_buf_from_iov(&iov, &verf_buf);
        mic.data = (u8 *)p;
        mic.len = len;
-
-       ret = ERR_PTR(-EACCES);
        maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
        if (maj_stat == GSS_S_CONTEXT_EXPIRED)
                clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
-       if (maj_stat) {
-               dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n",
-                       task->tk_pid, __func__, maj_stat);
-               goto out_bad;
-       }
+       if (maj_stat)
+               goto bad_mic;
+
        /* We leave it to unwrap to calculate au_rslack. For now we just
         * calculate the length of the verifier: */
        cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2;
+       status = 0;
+out:
        gss_put_ctx(ctx);
-       dprintk("RPC: %5u %s: gss_verify_mic succeeded.\n",
-                       task->tk_pid, __func__);
-       kfree(seq);
-       return p + XDR_QUADLEN(len);
-out_bad:
-       gss_put_ctx(ctx);
-       dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__,
-               PTR_ERR(ret));
        kfree(seq);
-       return ret;
-}
-
-static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
-                               __be32 *p, void *obj)
-{
-       struct xdr_stream xdr;
+       return status;
 
-       xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p);
-       encode(rqstp, &xdr, obj);
+validate_failed:
+       status = -EIO;
+       goto out;
+bad_mic:
+       trace_rpcgss_verify_mic(task, maj_stat);
+       status = -EACCES;
+       goto out;
 }
 
-static inline int
-gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-                  kxdreproc_t encode, struct rpc_rqst *rqstp,
-                  __be32 *p, void *obj)
+static int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+                             struct rpc_task *task, struct xdr_stream *xdr)
 {
-       struct xdr_buf  *snd_buf = &rqstp->rq_snd_buf;
-       struct xdr_buf  integ_buf;
-       __be32          *integ_len = NULL;
+       struct rpc_rqst *rqstp = task->tk_rqstp;
+       struct xdr_buf integ_buf, *snd_buf = &rqstp->rq_snd_buf;
        struct xdr_netobj mic;
-       u32             offset;
-       __be32          *q;
-       struct kvec     *iov;
-       u32             maj_stat = 0;
-       int             status = -EIO;
+       __be32 *p, *integ_len;
+       u32 offset, maj_stat;
 
+       p = xdr_reserve_space(xdr, 2 * sizeof(*p));
+       if (!p)
+               goto wrap_failed;
        integ_len = p++;
-       offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
-       *p++ = htonl(rqstp->rq_seqno);
+       *p = cpu_to_be32(rqstp->rq_seqno);
 
-       gss_wrap_req_encode(encode, rqstp, p, obj);
+       if (rpcauth_wrap_req_encode(task, xdr))
+               goto wrap_failed;
 
+       offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
        if (xdr_buf_subsegment(snd_buf, &integ_buf,
                                offset, snd_buf->len - offset))
-               return status;
-       *integ_len = htonl(integ_buf.len);
+               goto wrap_failed;
+       *integ_len = cpu_to_be32(integ_buf.len);
 
-       /* guess whether we're in the head or the tail: */
-       if (snd_buf->page_len || snd_buf->tail[0].iov_len)
-               iov = snd_buf->tail;
-       else
-               iov = snd_buf->head;
-       p = iov->iov_base + iov->iov_len;
+       p = xdr_reserve_space(xdr, 0);
+       if (!p)
+               goto wrap_failed;
        mic.data = (u8 *)(p + 1);
-
        maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
-       status = -EIO; /* XXX? */
        if (maj_stat == GSS_S_CONTEXT_EXPIRED)
                clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
        else if (maj_stat)
-               return status;
-       q = xdr_encode_opaque(p, NULL, mic.len);
-
-       offset = (u8 *)q - (u8 *)p;
-       iov->iov_len += offset;
-       snd_buf->len += offset;
+               goto bad_mic;
+       /* Check that the trailing MIC fit in the buffer, after the fact */
+       if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0)
+               goto wrap_failed;
        return 0;
+wrap_failed:
+       return -EMSGSIZE;
+bad_mic:
+       trace_rpcgss_get_mic(task, maj_stat);
+       return -EIO;
 }
 
 static void
@@ -1822,61 +1781,62 @@ out:
        return -EAGAIN;
 }
 
-static inline int
-gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-                 kxdreproc_t encode, struct rpc_rqst *rqstp,
-                 __be32 *p, void *obj)
+static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+                            struct rpc_task *task, struct xdr_stream *xdr)
 {
+       struct rpc_rqst *rqstp = task->tk_rqstp;
        struct xdr_buf  *snd_buf = &rqstp->rq_snd_buf;
-       u32             offset;
-       u32             maj_stat;
+       u32             pad, offset, maj_stat;
        int             status;
-       __be32          *opaque_len;
+       __be32          *p, *opaque_len;
        struct page     **inpages;
        int             first;
-       int             pad;
        struct kvec     *iov;
-       char            *tmp;
 
+       status = -EIO;
+       p = xdr_reserve_space(xdr, 2 * sizeof(*p));
+       if (!p)
+               goto wrap_failed;
        opaque_len = p++;
-       offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
-       *p++ = htonl(rqstp->rq_seqno);
+       *p = cpu_to_be32(rqstp->rq_seqno);
 
-       gss_wrap_req_encode(encode, rqstp, p, obj);
+       if (rpcauth_wrap_req_encode(task, xdr))
+               goto wrap_failed;
 
        status = alloc_enc_pages(rqstp);
-       if (status)
-               return status;
+       if (unlikely(status))
+               goto wrap_failed;
        first = snd_buf->page_base >> PAGE_SHIFT;
        inpages = snd_buf->pages + first;
        snd_buf->pages = rqstp->rq_enc_pages;
        snd_buf->page_base -= first << PAGE_SHIFT;
        /*
-        * Give the tail its own page, in case we need extra space in the
-        * head when wrapping:
+        * Move the tail into its own page, in case gss_wrap needs
+        * more space in the head when wrapping.
         *
-        * call_allocate() allocates twice the slack space required
-        * by the authentication flavor to rq_callsize.
-        * For GSS, slack is GSS_CRED_SLACK.
+        * Still... Why can't gss_wrap just slide the tail down?
         */
        if (snd_buf->page_len || snd_buf->tail[0].iov_len) {
+               char *tmp;
+
                tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]);
                memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len);
                snd_buf->tail[0].iov_base = tmp;
        }
+       offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
        maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages);
        /* slack space should prevent this ever happening: */
-       BUG_ON(snd_buf->len > snd_buf->buflen);
-       status = -EIO;
+       if (unlikely(snd_buf->len > snd_buf->buflen))
+               goto wrap_failed;
        /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
         * done anyway, so it's safe to put the request on the wire: */
        if (maj_stat == GSS_S_CONTEXT_EXPIRED)
                clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
        else if (maj_stat)
-               return status;
+               goto bad_wrap;
 
-       *opaque_len = htonl(snd_buf->len - offset);
-       /* guess whether we're in the head or the tail: */
+       *opaque_len = cpu_to_be32(snd_buf->len - offset);
+       /* guess whether the pad goes into the head or the tail: */
        if (snd_buf->page_len || snd_buf->tail[0].iov_len)
                iov = snd_buf->tail;
        else
@@ -1888,118 +1848,154 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
        snd_buf->len += pad;
 
        return 0;
+wrap_failed:
+       return status;
+bad_wrap:
+       trace_rpcgss_wrap(task, maj_stat);
+       return -EIO;
 }
 
-static int
-gss_wrap_req(struct rpc_task *task,
-            kxdreproc_t encode, void *rqstp, __be32 *p, void *obj)
+static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
 {
        struct rpc_cred *cred = task->tk_rqstp->rq_cred;
        struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
                        gc_base);
        struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
-       int             status = -EIO;
+       int status;
 
-       dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
+       status = -EIO;
        if (ctx->gc_proc != RPC_GSS_PROC_DATA) {
                /* The spec seems a little ambiguous here, but I think that not
                 * wrapping context destruction requests makes the most sense.
                 */
-               gss_wrap_req_encode(encode, rqstp, p, obj);
-               status = 0;
+               status = rpcauth_wrap_req_encode(task, xdr);
                goto out;
        }
        switch (gss_cred->gc_service) {
        case RPC_GSS_SVC_NONE:
-               gss_wrap_req_encode(encode, rqstp, p, obj);
-               status = 0;
+               status = rpcauth_wrap_req_encode(task, xdr);
                break;
        case RPC_GSS_SVC_INTEGRITY:
-               status = gss_wrap_req_integ(cred, ctx, encode, rqstp, p, obj);
+               status = gss_wrap_req_integ(cred, ctx, task, xdr);
                break;
        case RPC_GSS_SVC_PRIVACY:
-               status = gss_wrap_req_priv(cred, ctx, encode, rqstp, p, obj);
+               status = gss_wrap_req_priv(cred, ctx, task, xdr);
                break;
+       default:
+               status = -EIO;
        }
 out:
        gss_put_ctx(ctx);
-       dprintk("RPC: %5u %s returning %d\n", task->tk_pid, __func__, status);
        return status;
 }
 
-static inline int
-gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-               struct rpc_rqst *rqstp, __be32 **p)
+static int
+gss_unwrap_resp_auth(struct rpc_cred *cred)
 {
-       struct xdr_buf  *rcv_buf = &rqstp->rq_rcv_buf;
-       struct xdr_buf integ_buf;
+       struct rpc_auth *auth = cred->cr_auth;
+
+       auth->au_rslack = auth->au_verfsize;
+       auth->au_ralign = auth->au_verfsize;
+       return 0;
+}
+
+static int
+gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred,
+                     struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp,
+                     struct xdr_stream *xdr)
+{
+       struct xdr_buf integ_buf, *rcv_buf = &rqstp->rq_rcv_buf;
+       u32 data_offset, mic_offset, integ_len, maj_stat;
+       struct rpc_auth *auth = cred->cr_auth;
        struct xdr_netobj mic;
-       u32 data_offset, mic_offset;
-       u32 integ_len;
-       u32 maj_stat;
-       int status = -EIO;
+       __be32 *p;
 
-       integ_len = ntohl(*(*p)++);
+       p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+       if (unlikely(!p))
+               goto unwrap_failed;
+       integ_len = be32_to_cpup(p++);
        if (integ_len & 3)
-               return status;
-       data_offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
+               goto unwrap_failed;
+       data_offset = (u8 *)(p) - (u8 *)rcv_buf->head[0].iov_base;
        mic_offset = integ_len + data_offset;
        if (mic_offset > rcv_buf->len)
-               return status;
-       if (ntohl(*(*p)++) != rqstp->rq_seqno)
-               return status;
-
-       if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset,
-                               mic_offset - data_offset))
-               return status;
+               goto unwrap_failed;
+       if (be32_to_cpup(p) != rqstp->rq_seqno)
+               goto bad_seqno;
 
+       if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, integ_len))
+               goto unwrap_failed;
        if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset))
-               return status;
-
+               goto unwrap_failed;
        maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
        if (maj_stat == GSS_S_CONTEXT_EXPIRED)
                clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
        if (maj_stat != GSS_S_COMPLETE)
-               return status;
+               goto bad_mic;
+
+       auth->au_rslack = auth->au_verfsize + 2 + 1 + XDR_QUADLEN(mic.len);
+       auth->au_ralign = auth->au_verfsize + 2;
        return 0;
+unwrap_failed:
+       trace_rpcgss_unwrap_failed(task);
+       return -EIO;
+bad_seqno:
+       trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(p));
+       return -EIO;
+bad_mic:
+       trace_rpcgss_verify_mic(task, maj_stat);
+       return -EIO;
 }
 
-static inline int
-gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-               struct rpc_rqst *rqstp, __be32 **p)
-{
-       struct xdr_buf  *rcv_buf = &rqstp->rq_rcv_buf;
-       u32 offset;
-       u32 opaque_len;
-       u32 maj_stat;
-       int status = -EIO;
-
-       opaque_len = ntohl(*(*p)++);
-       offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
+static int
+gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
+                    struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp,
+                    struct xdr_stream *xdr)
+{
+       struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
+       struct kvec *head = rqstp->rq_rcv_buf.head;
+       struct rpc_auth *auth = cred->cr_auth;
+       unsigned int savedlen = rcv_buf->len;
+       u32 offset, opaque_len, maj_stat;
+       __be32 *p;
+
+       p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+       if (unlikely(!p))
+               goto unwrap_failed;
+       opaque_len = be32_to_cpup(p++);
+       offset = (u8 *)(p) - (u8 *)head->iov_base;
        if (offset + opaque_len > rcv_buf->len)
-               return status;
-       /* remove padding: */
+               goto unwrap_failed;
        rcv_buf->len = offset + opaque_len;
 
        maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf);
        if (maj_stat == GSS_S_CONTEXT_EXPIRED)
                clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
        if (maj_stat != GSS_S_COMPLETE)
-               return status;
-       if (ntohl(*(*p)++) != rqstp->rq_seqno)
-               return status;
+               goto bad_unwrap;
+       /* gss_unwrap decrypted the sequence number */
+       if (be32_to_cpup(p++) != rqstp->rq_seqno)
+               goto bad_seqno;
 
-       return 0;
-}
-
-static int
-gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
-                     __be32 *p, void *obj)
-{
-       struct xdr_stream xdr;
+       /* gss_unwrap redacts the opaque blob from the head iovec.
+        * rcv_buf has changed, thus the stream needs to be reset.
+        */
+       xdr_init_decode(xdr, rcv_buf, p, rqstp);
 
-       xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-       return decode(rqstp, &xdr, obj);
+       auth->au_rslack = auth->au_verfsize + 2 +
+                         XDR_QUADLEN(savedlen - rcv_buf->len);
+       auth->au_ralign = auth->au_verfsize + 2 +
+                         XDR_QUADLEN(savedlen - rcv_buf->len);
+       return 0;
+unwrap_failed:
+       trace_rpcgss_unwrap_failed(task);
+       return -EIO;
+bad_seqno:
+       trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(--p));
+       return -EIO;
+bad_unwrap:
+       trace_rpcgss_unwrap(task, maj_stat);
+       return -EIO;
 }
 
 static bool
@@ -2014,14 +2010,14 @@ gss_xmit_need_reencode(struct rpc_task *task)
        struct rpc_rqst *req = task->tk_rqstp;
        struct rpc_cred *cred = req->rq_cred;
        struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
-       u32 win, seq_xmit;
+       u32 win, seq_xmit = 0;
        bool ret = true;
 
        if (!ctx)
-               return true;
+               goto out;
 
        if (gss_seq_is_newer(req->rq_seqno, READ_ONCE(ctx->gc_seq)))
-               goto out;
+               goto out_ctx;
 
        seq_xmit = READ_ONCE(ctx->gc_seq_xmit);
        while (gss_seq_is_newer(req->rq_seqno, seq_xmit)) {
@@ -2030,56 +2026,51 @@ gss_xmit_need_reencode(struct rpc_task *task)
                seq_xmit = cmpxchg(&ctx->gc_seq_xmit, tmp, req->rq_seqno);
                if (seq_xmit == tmp) {
                        ret = false;
-                       goto out;
+                       goto out_ctx;
                }
        }
 
        win = ctx->gc_win;
        if (win > 0)
                ret = !gss_seq_is_newer(req->rq_seqno, seq_xmit - win);
-out:
+
+out_ctx:
        gss_put_ctx(ctx);
+out:
+       trace_rpcgss_need_reencode(task, seq_xmit, ret);
        return ret;
 }
 
 static int
-gss_unwrap_resp(struct rpc_task *task,
-               kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj)
+gss_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr)
 {
-       struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+       struct rpc_rqst *rqstp = task->tk_rqstp;
+       struct rpc_cred *cred = rqstp->rq_cred;
        struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
                        gc_base);
        struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
-       __be32          *savedp = p;
-       struct kvec     *head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head;
-       int             savedlen = head->iov_len;
-       int             status = -EIO;
+       int status = -EIO;
 
        if (ctx->gc_proc != RPC_GSS_PROC_DATA)
                goto out_decode;
        switch (gss_cred->gc_service) {
        case RPC_GSS_SVC_NONE:
+               status = gss_unwrap_resp_auth(cred);
                break;
        case RPC_GSS_SVC_INTEGRITY:
-               status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p);
-               if (status)
-                       goto out;
+               status = gss_unwrap_resp_integ(task, cred, ctx, rqstp, xdr);
                break;
        case RPC_GSS_SVC_PRIVACY:
-               status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p);
-               if (status)
-                       goto out;
+               status = gss_unwrap_resp_priv(task, cred, ctx, rqstp, xdr);
                break;
        }
-       /* take into account extra slack for integrity and privacy cases: */
-       cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp)
-                                               + (savedlen - head->iov_len);
+       if (status)
+               goto out;
+
 out_decode:
-       status = gss_unwrap_req_decode(decode, rqstp, p, obj);
+       status = rpcauth_unwrap_resp_decode(task, xdr);
 out:
        gss_put_ctx(ctx);
-       dprintk("RPC: %5u %s returning %d\n",
-               task->tk_pid, __func__, status);
        return status;
 }
 
index eab71fc7af3e00ff72a339c1f3d7f1e910358e89..56cc85c5bc06d690fcdb62f9bc399e424cb83a33 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: BSD-3-Clause
 /*
  *  linux/net/sunrpc/gss_krb5_mech.c
  *
@@ -6,32 +7,6 @@
  *
  *  Andy Adamson <andros@umich.edu>
  *  J. Bruce Fields <bfields@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <crypto/hash.h>
@@ -53,6 +28,7 @@
 static struct gss_api_mech gss_kerberos_mech;  /* forward declaration */
 
 static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
+#ifndef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
        /*
         * DES (All DES enctypes are mapped to the same gss functionality)
         */
@@ -74,6 +50,7 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
          .cksumlength = 8,
          .keyed_cksum = 0,
        },
+#endif /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */
        /*
         * RC4-HMAC
         */
index 5cdde6cb703a423ff48682f86e5275e331bbe242..14a0aff0cd84c644ac9e1bf5e8444630aaec7b55 100644 (file)
@@ -570,14 +570,16 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
         */
        movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len);
        movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip;
-       BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
-                                                       buf->head[0].iov_len);
+       if (offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
+           buf->head[0].iov_len)
+               return GSS_S_FAILURE;
        memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
        buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
        buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
 
        /* Trim off the trailing "extra count" and checksum blob */
-       xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
+       buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip;
+
        return GSS_S_COMPLETE;
 }
 
index 379318dff5347325365d5a3ecc436c339f9817fe..82060099a429159a1801be76282478a7746b686f 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: BSD-3-Clause
 /*
  *  linux/net/sunrpc/gss_mech_switch.c
  *
@@ -5,32 +6,6 @@
  *  All rights reserved.
  *
  *  J. Bruce Fields   <bfields@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  */
 
 #include <linux/types.h>
index 73dcda060335fc1a52105b399a65f20bf12f4d6e..0349f455a8624fbb6c68e0da2385bf4c5e6fb539 100644 (file)
@@ -1,21 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  *  linux/net/sunrpc/gss_rpc_upcall.c
  *
  *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #include <linux/types.h>
index 1e542aded90a10cb20d78a23b4327a92a0806ae9..31e96344167e32669f25365a5f1afe111448731f 100644 (file)
@@ -1,21 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  *  linux/net/sunrpc/gss_rpc_upcall.h
  *
  *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #ifndef _GSS_RPC_UPCALL_H
@@ -45,4 +32,5 @@ void gssp_free_upcall_data(struct gssp_upcall_data *data);
 void init_gssp_clnt(struct sunrpc_net *);
 int set_gssp_clnt(struct net *);
 void clear_gssp_clnt(struct sunrpc_net *);
+
 #endif /* _GSS_RPC_UPCALL_H */
index 006062ad5f583adf07c30f13f1310ea85a3d729b..2ff7b7083ebab9dee3d4d1196be181cb39ba6c11 100644 (file)
@@ -1,21 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * GSS Proxy upcall module
  *
  *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #include <linux/sunrpc/svcauth.h>
index 146c310329173b92ec9c8319785632e26519dffd..3f17411b7e65e98285995f979e456352c3bd114b 100644 (file)
@@ -1,21 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * GSS Proxy upcall module
  *
  *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #ifndef _LINUX_GSS_RPC_XDR_H
@@ -262,6 +249,4 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
 #define GSSX_ARG_wrap_size_limit_sz 0
 #define GSSX_RES_wrap_size_limit_sz 0
 
-
-
 #endif /* _LINUX_GSS_RPC_XDR_H */
index 152790ed309c6f2cb10df426afbf3f9f603025c1..0c5d7896d6dd66eacb23dc1b864abb2592910123 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Neil Brown <neilb@cse.unsw.edu.au>
  * J. Bruce Fields <bfields@umich.edu>
@@ -896,7 +897,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
        if (svc_getnl(&buf->head[0]) != seq)
                goto out;
        /* trim off the mic and padding at the end before returning */
-       xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4);
+       buf->len -= 4 + round_up_to_quad(mic.len);
        stat = 0;
 out:
        kfree(mic.data);
diff --git a/net/sunrpc/auth_gss/trace.c b/net/sunrpc/auth_gss/trace.c
new file mode 100644 (file)
index 0000000..5576f1e
--- /dev/null
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, 2019 Oracle. All rights reserved.
+ */
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/gss_err.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/rpcgss.h>
index d0ceac57c06e61e5cfe69d31a91046fc2b7bb109..41a633a4049e822a9bc097686d5415005c8fa079 100644 (file)
@@ -59,15 +59,21 @@ nul_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags)
 /*
  * Marshal credential.
  */
-static __be32 *
-nul_marshal(struct rpc_task *task, __be32 *p)
+static int
+nul_marshal(struct rpc_task *task, struct xdr_stream *xdr)
 {
-       *p++ = htonl(RPC_AUTH_NULL);
-       *p++ = 0;
-       *p++ = htonl(RPC_AUTH_NULL);
-       *p++ = 0;
-
-       return p;
+       __be32 *p;
+
+       p = xdr_reserve_space(xdr, 4 * sizeof(*p));
+       if (!p)
+               return -EMSGSIZE;
+       /* Credential */
+       *p++ = rpc_auth_null;
+       *p++ = xdr_zero;
+       /* Verifier */
+       *p++ = rpc_auth_null;
+       *p   = xdr_zero;
+       return 0;
 }
 
 /*
@@ -80,25 +86,19 @@ nul_refresh(struct rpc_task *task)
        return 0;
 }
 
-static __be32 *
-nul_validate(struct rpc_task *task, __be32 *p)
+static int
+nul_validate(struct rpc_task *task, struct xdr_stream *xdr)
 {
-       rpc_authflavor_t        flavor;
-       u32                     size;
-
-       flavor = ntohl(*p++);
-       if (flavor != RPC_AUTH_NULL) {
-               printk("RPC: bad verf flavor: %u\n", flavor);
-               return ERR_PTR(-EIO);
-       }
-
-       size = ntohl(*p++);
-       if (size != 0) {
-               printk("RPC: bad verf size: %u\n", size);
-               return ERR_PTR(-EIO);
-       }
-
-       return p;
+       __be32 *p;
+
+       p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+       if (!p)
+               return -EIO;
+       if (*p++ != rpc_auth_null)
+               return -EIO;
+       if (*p != xdr_zero)
+               return -EIO;
+       return 0;
 }
 
 const struct rpc_authops authnull_ops = {
@@ -114,6 +114,8 @@ static
 struct rpc_auth null_auth = {
        .au_cslack      = NUL_CALLSLACK,
        .au_rslack      = NUL_REPLYSLACK,
+       .au_verfsize    = NUL_REPLYSLACK,
+       .au_ralign      = NUL_REPLYSLACK,
        .au_ops         = &authnull_ops,
        .au_flavor      = RPC_AUTH_NULL,
        .au_count       = REFCOUNT_INIT(1),
@@ -125,8 +127,10 @@ const struct rpc_credops null_credops = {
        .crdestroy      = nul_destroy_cred,
        .crmatch        = nul_match,
        .crmarshal      = nul_marshal,
+       .crwrap_req     = rpcauth_wrap_req_encode,
        .crrefresh      = nul_refresh,
        .crvalidate     = nul_validate,
+       .crunwrap_resp  = rpcauth_unwrap_resp_decode,
 };
 
 static
index 387f6b3ffbeafa79db312e0745af5f9ebd9c21ca..d4018e5a24c52caf34b12deabb697d9ae676a613 100644 (file)
@@ -28,8 +28,6 @@ static mempool_t              *unix_pool;
 static struct rpc_auth *
 unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
-       dprintk("RPC:       creating UNIX authenticator for client %p\n",
-                       clnt);
        refcount_inc(&unix_auth.au_count);
        return &unix_auth;
 }
@@ -37,7 +35,6 @@ unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 static void
 unx_destroy(struct rpc_auth *auth)
 {
-       dprintk("RPC:       destroying UNIX authenticator %p\n", auth);
 }
 
 /*
@@ -48,10 +45,6 @@ unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 {
        struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_NOFS);
 
-       dprintk("RPC:       allocating UNIX cred for uid %d gid %d\n",
-                       from_kuid(&init_user_ns, acred->cred->fsuid),
-                       from_kgid(&init_user_ns, acred->cred->fsgid));
-
        rpcauth_init_cred(ret, acred, auth, &unix_credops);
        ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
        return ret;
@@ -61,7 +54,7 @@ static void
 unx_free_cred_callback(struct rcu_head *head)
 {
        struct rpc_cred *rpc_cred = container_of(head, struct rpc_cred, cr_rcu);
-       dprintk("RPC:       unx_free_cred %p\n", rpc_cred);
+
        put_cred(rpc_cred->cr_cred);
        mempool_free(rpc_cred, unix_pool);
 }
@@ -87,7 +80,7 @@ unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
        if (!uid_eq(cred->cr_cred->fsuid, acred->cred->fsuid) || !gid_eq(cred->cr_cred->fsgid, acred->cred->fsgid))
                return 0;
 
-       if (acred->cred && acred->cred->group_info != NULL)
+       if (acred->cred->group_info != NULL)
                groups = acred->cred->group_info->ngroups;
        if (groups > UNX_NGROUPS)
                groups = UNX_NGROUPS;
@@ -106,37 +99,55 @@ unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
  * Marshal credentials.
  * Maybe we should keep a cached credential for performance reasons.
  */
-static __be32 *
-unx_marshal(struct rpc_task *task, __be32 *p)
+static int
+unx_marshal(struct rpc_task *task, struct xdr_stream *xdr)
 {
        struct rpc_clnt *clnt = task->tk_client;
        struct rpc_cred *cred = task->tk_rqstp->rq_cred;
-       __be32          *base, *hold;
+       __be32          *p, *cred_len, *gidarr_len;
        int             i;
        struct group_info *gi = cred->cr_cred->group_info;
 
-       *p++ = htonl(RPC_AUTH_UNIX);
-       base = p++;
-       *p++ = htonl(jiffies/HZ);
-
-       /*
-        * Copy the UTS nodename captured when the client was created.
-        */
-       p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
-
-       *p++ = htonl((u32) from_kuid(&init_user_ns, cred->cr_cred->fsuid));
-       *p++ = htonl((u32) from_kgid(&init_user_ns, cred->cr_cred->fsgid));
-       hold = p++;
+       /* Credential */
+
+       p = xdr_reserve_space(xdr, 3 * sizeof(*p));
+       if (!p)
+               goto marshal_failed;
+       *p++ = rpc_auth_unix;
+       cred_len = p++;
+       *p++ = xdr_zero;        /* stamp */
+       if (xdr_stream_encode_opaque(xdr, clnt->cl_nodename,
+                                    clnt->cl_nodelen) < 0)
+               goto marshal_failed;
+       p = xdr_reserve_space(xdr, 3 * sizeof(*p));
+       if (!p)
+               goto marshal_failed;
+       *p++ = cpu_to_be32(from_kuid(&init_user_ns, cred->cr_cred->fsuid));
+       *p++ = cpu_to_be32(from_kgid(&init_user_ns, cred->cr_cred->fsgid));
+
+       gidarr_len = p++;
        if (gi)
                for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++)
-                       *p++ = htonl((u32) from_kgid(&init_user_ns, gi->gid[i]));
-       *hold = htonl(p - hold - 1);            /* gid array length */
-       *base = htonl((p - base - 1) << 2);     /* cred length */
+                       *p++ = cpu_to_be32(from_kgid(&init_user_ns,
+                                                    gi->gid[i]));
+       *gidarr_len = cpu_to_be32(p - gidarr_len - 1);
+       *cred_len = cpu_to_be32((p - cred_len - 1) << 2);
+       p = xdr_reserve_space(xdr, (p - gidarr_len - 1) << 2);
+       if (!p)
+               goto marshal_failed;
+
+       /* Verifier */
+
+       p = xdr_reserve_space(xdr, 2 * sizeof(*p));
+       if (!p)
+               goto marshal_failed;
+       *p++ = rpc_auth_null;
+       *p   = xdr_zero;
 
-       *p++ = htonl(RPC_AUTH_NULL);
-       *p++ = htonl(0);
+       return 0;
 
-       return p;
+marshal_failed:
+       return -EMSGSIZE;
 }
 
 /*
@@ -149,29 +160,35 @@ unx_refresh(struct rpc_task *task)
        return 0;
 }
 
-static __be32 *
-unx_validate(struct rpc_task *task, __be32 *p)
+static int
+unx_validate(struct rpc_task *task, struct xdr_stream *xdr)
 {
-       rpc_authflavor_t        flavor;
-       u32                     size;
-
-       flavor = ntohl(*p++);
-       if (flavor != RPC_AUTH_NULL &&
-           flavor != RPC_AUTH_UNIX &&
-           flavor != RPC_AUTH_SHORT) {
-               printk("RPC: bad verf flavor: %u\n", flavor);
-               return ERR_PTR(-EIO);
-       }
-
-       size = ntohl(*p++);
-       if (size > RPC_MAX_AUTH_SIZE) {
-               printk("RPC: giant verf size: %u\n", size);
-               return ERR_PTR(-EIO);
+       struct rpc_auth *auth = task->tk_rqstp->rq_cred->cr_auth;
+       __be32 *p;
+       u32 size;
+
+       p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+       if (!p)
+               return -EIO;
+       switch (*p++) {
+       case rpc_auth_null:
+       case rpc_auth_unix:
+       case rpc_auth_short:
+               break;
+       default:
+               return -EIO;
        }
-       task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
-       p += (size >> 2);
-
-       return p;
+       size = be32_to_cpup(p);
+       if (size > RPC_MAX_AUTH_SIZE)
+               return -EIO;
+       p = xdr_inline_decode(xdr, size);
+       if (!p)
+               return -EIO;
+
+       auth->au_verfsize = XDR_QUADLEN(size) + 2;
+       auth->au_rslack = XDR_QUADLEN(size) + 2;
+       auth->au_ralign = XDR_QUADLEN(size) + 2;
+       return 0;
 }
 
 int __init rpc_init_authunix(void)
@@ -198,6 +215,7 @@ static
 struct rpc_auth                unix_auth = {
        .au_cslack      = UNX_CALLSLACK,
        .au_rslack      = NUL_REPLYSLACK,
+       .au_verfsize    = NUL_REPLYSLACK,
        .au_ops         = &authunix_ops,
        .au_flavor      = RPC_AUTH_UNIX,
        .au_count       = REFCOUNT_INIT(1),
@@ -209,6 +227,8 @@ const struct rpc_credops unix_credops = {
        .crdestroy      = unx_destroy_cred,
        .crmatch        = unx_match,
        .crmarshal      = unx_marshal,
+       .crwrap_req     = rpcauth_wrap_req_encode,
        .crrefresh      = unx_refresh,
        .crvalidate     = unx_validate,
+       .crunwrap_resp  = rpcauth_unwrap_resp_decode,
 };
index ec451b8114b0b07dbe5db102af8002d58b7804cd..c47d82622fd12129048215190b19fcb82721f78f 100644 (file)
@@ -235,7 +235,8 @@ out:
                list_empty(&xprt->bc_pa_list) ? "true" : "false");
 }
 
-static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
+static struct rpc_rqst *xprt_get_bc_request(struct rpc_xprt *xprt, __be32 xid,
+               struct rpc_rqst *new)
 {
        struct rpc_rqst *req = NULL;
 
@@ -243,22 +244,20 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
        if (atomic_read(&xprt->bc_free_slots) <= 0)
                goto not_found;
        if (list_empty(&xprt->bc_pa_list)) {
-               req = xprt_alloc_bc_req(xprt, GFP_ATOMIC);
-               if (!req)
+               if (!new)
                        goto not_found;
-               list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
+               list_add_tail(&new->rq_bc_pa_list, &xprt->bc_pa_list);
                xprt->bc_alloc_count++;
        }
        req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
                                rq_bc_pa_list);
        req->rq_reply_bytes_recvd = 0;
-       req->rq_bytes_sent = 0;
        memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
                        sizeof(req->rq_private_buf));
        req->rq_xid = xid;
        req->rq_connect_cookie = xprt->connect_cookie;
-not_found:
        dprintk("RPC:       backchannel req=%p\n", req);
+not_found:
        return req;
 }
 
@@ -321,18 +320,27 @@ void xprt_free_bc_rqst(struct rpc_rqst *req)
  */
 struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid)
 {
-       struct rpc_rqst *req;
-
-       spin_lock(&xprt->bc_pa_lock);
-       list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) {
-               if (req->rq_connect_cookie != xprt->connect_cookie)
-                       continue;
-               if (req->rq_xid == xid)
-                       goto found;
-       }
-       req = xprt_alloc_bc_request(xprt, xid);
+       struct rpc_rqst *req, *new = NULL;
+
+       do {
+               spin_lock(&xprt->bc_pa_lock);
+               list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) {
+                       if (req->rq_connect_cookie != xprt->connect_cookie)
+                               continue;
+                       if (req->rq_xid == xid)
+                               goto found;
+               }
+               req = xprt_get_bc_request(xprt, xid, new);
 found:
-       spin_unlock(&xprt->bc_pa_lock);
+               spin_unlock(&xprt->bc_pa_lock);
+               if (new) {
+                       if (req != new)
+                               xprt_free_bc_rqst(new);
+                       break;
+               } else if (req)
+                       break;
+               new = xprt_alloc_bc_req(xprt, GFP_KERNEL);
+       } while (new);
        return req;
 }
 
index d7ec6132c046ec409db057d6ace332ab620fa88b..4216fe33204a8e3f2634cae5ed2329335b1885da 100644 (file)
@@ -66,20 +66,19 @@ static void call_decode(struct rpc_task *task);
 static void    call_bind(struct rpc_task *task);
 static void    call_bind_status(struct rpc_task *task);
 static void    call_transmit(struct rpc_task *task);
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-static void    call_bc_transmit(struct rpc_task *task);
-#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 static void    call_status(struct rpc_task *task);
 static void    call_transmit_status(struct rpc_task *task);
 static void    call_refresh(struct rpc_task *task);
 static void    call_refreshresult(struct rpc_task *task);
-static void    call_timeout(struct rpc_task *task);
 static void    call_connect(struct rpc_task *task);
 static void    call_connect_status(struct rpc_task *task);
 
-static __be32  *rpc_encode_header(struct rpc_task *task);
-static __be32  *rpc_verify_header(struct rpc_task *task);
+static int     rpc_encode_header(struct rpc_task *task,
+                                 struct xdr_stream *xdr);
+static int     rpc_decode_header(struct rpc_task *task,
+                                 struct xdr_stream *xdr);
 static int     rpc_ping(struct rpc_clnt *clnt);
+static void    rpc_check_timeout(struct rpc_task *task);
 
 static void rpc_register_client(struct rpc_clnt *clnt)
 {
@@ -834,9 +833,6 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
                if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
                        rovr->tk_flags |= RPC_TASK_KILLED;
                        rpc_exit(rovr, -EIO);
-                       if (RPC_IS_QUEUED(rovr))
-                               rpc_wake_up_queued_task(rovr->tk_waitqueue,
-                                                       rovr);
                }
        }
        spin_unlock(&clnt->cl_lock);
@@ -1131,6 +1127,8 @@ rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags,
 EXPORT_SYMBOL_GPL(rpc_call_async);
 
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static void call_bc_encode(struct rpc_task *task);
+
 /**
  * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
  * rpc_execute against it
@@ -1152,7 +1150,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
        task = rpc_new_task(&task_setup_data);
        xprt_init_bc_request(req, task);
 
-       task->tk_action = call_bc_transmit;
+       task->tk_action = call_bc_encode;
        atomic_inc(&task->tk_count);
        WARN_ON_ONCE(atomic_read(&task->tk_count) != 2);
        rpc_execute(task);
@@ -1162,6 +1160,29 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
 }
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
+/**
+ * rpc_prepare_reply_pages - Prepare to receive a reply data payload into pages
+ * @req: RPC request to prepare
+ * @pages: vector of struct page pointers
+ * @base: offset in first page where receive should start, in bytes
+ * @len: expected size of the upper layer data payload, in bytes
+ * @hdrsize: expected size of upper layer reply header, in XDR words
+ *
+ */
+void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
+                            unsigned int base, unsigned int len,
+                            unsigned int hdrsize)
+{
+       /* Subtract one to force an extra word of buffer space for the
+        * payload's XDR pad to fall into the rcv_buf's tail iovec.
+        */
+       hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_ralign - 1;
+
+       xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
+       trace_rpc_reply_pages(req);
+}
+EXPORT_SYMBOL_GPL(rpc_prepare_reply_pages);
+
 void
 rpc_call_start(struct rpc_task *task)
 {
@@ -1519,6 +1540,7 @@ call_start(struct rpc_task *task)
        clnt->cl_stats->rpccnt++;
        task->tk_action = call_reserve;
        rpc_task_set_transport(task, clnt);
+       call_reserve(task);
 }
 
 /*
@@ -1532,6 +1554,9 @@ call_reserve(struct rpc_task *task)
        task->tk_status  = 0;
        task->tk_action  = call_reserveresult;
        xprt_reserve(task);
+       if (rpc_task_need_resched(task))
+               return;
+        call_reserveresult(task);
 }
 
 static void call_retry_reserve(struct rpc_task *task);
@@ -1554,6 +1579,7 @@ call_reserveresult(struct rpc_task *task)
        if (status >= 0) {
                if (task->tk_rqstp) {
                        task->tk_action = call_refresh;
+                       call_refresh(task);
                        return;
                }
 
@@ -1579,6 +1605,7 @@ call_reserveresult(struct rpc_task *task)
                /* fall through */
        case -EAGAIN:   /* woken up; retry */
                task->tk_action = call_retry_reserve;
+               call_retry_reserve(task);
                return;
        case -EIO:      /* probably a shutdown */
                break;
@@ -1601,6 +1628,9 @@ call_retry_reserve(struct rpc_task *task)
        task->tk_status  = 0;
        task->tk_action  = call_reserveresult;
        xprt_retry_reserve(task);
+       if (rpc_task_need_resched(task))
+               return;
+       call_reserveresult(task);
 }
 
 /*
@@ -1615,6 +1645,9 @@ call_refresh(struct rpc_task *task)
        task->tk_status = 0;
        task->tk_client->cl_stats->rpcauthrefresh++;
        rpcauth_refreshcred(task);
+       if (rpc_task_need_resched(task))
+               return;
+       call_refreshresult(task);
 }
 
 /*
@@ -1633,6 +1666,7 @@ call_refreshresult(struct rpc_task *task)
        case 0:
                if (rpcauth_uptodatecred(task)) {
                        task->tk_action = call_allocate;
+                       call_allocate(task);
                        return;
                }
                /* Use rate-limiting and a max number of retries if refresh
@@ -1651,6 +1685,7 @@ call_refreshresult(struct rpc_task *task)
                task->tk_cred_retry--;
                dprintk("RPC: %5u %s: retry refresh creds\n",
                                task->tk_pid, __func__);
+               call_refresh(task);
                return;
        }
        dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
@@ -1665,7 +1700,7 @@ call_refreshresult(struct rpc_task *task)
 static void
 call_allocate(struct rpc_task *task)
 {
-       unsigned int slack = task->tk_rqstp->rq_cred->cr_auth->au_cslack;
+       const struct rpc_auth *auth = task->tk_rqstp->rq_cred->cr_auth;
        struct rpc_rqst *req = task->tk_rqstp;
        struct rpc_xprt *xprt = req->rq_xprt;
        const struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
@@ -1676,8 +1711,10 @@ call_allocate(struct rpc_task *task)
        task->tk_status = 0;
        task->tk_action = call_encode;
 
-       if (req->rq_buffer)
+       if (req->rq_buffer) {
+               call_encode(task);
                return;
+       }
 
        if (proc->p_proc != 0) {
                BUG_ON(proc->p_arglen == 0);
@@ -1690,15 +1727,20 @@ call_allocate(struct rpc_task *task)
         * and reply headers, and convert both values
         * to byte sizes.
         */
-       req->rq_callsize = RPC_CALLHDRSIZE + (slack << 1) + proc->p_arglen;
+       req->rq_callsize = RPC_CALLHDRSIZE + (auth->au_cslack << 1) +
+                          proc->p_arglen;
        req->rq_callsize <<= 2;
-       req->rq_rcvsize = RPC_REPHDRSIZE + slack + proc->p_replen;
+       req->rq_rcvsize = RPC_REPHDRSIZE + auth->au_rslack + proc->p_replen;
        req->rq_rcvsize <<= 2;
 
        status = xprt->ops->buf_alloc(task);
        xprt_inject_disconnect(xprt);
-       if (status == 0)
+       if (status == 0) {
+               if (rpc_task_need_resched(task))
+                       return;
+               call_encode(task);
                return;
+       }
        if (status != -ENOMEM) {
                rpc_exit(task, status);
                return;
@@ -1728,10 +1770,7 @@ static void
 rpc_xdr_encode(struct rpc_task *task)
 {
        struct rpc_rqst *req = task->tk_rqstp;
-       kxdreproc_t     encode;
-       __be32          *p;
-
-       dprint_status(task);
+       struct xdr_stream xdr;
 
        xdr_buf_init(&req->rq_snd_buf,
                     req->rq_buffer,
@@ -1740,18 +1779,13 @@ rpc_xdr_encode(struct rpc_task *task)
                     req->rq_rbuffer,
                     req->rq_rcvsize);
 
-       p = rpc_encode_header(task);
-       if (p == NULL)
-               return;
-
-       encode = task->tk_msg.rpc_proc->p_encode;
-       if (encode == NULL)
+       req->rq_snd_buf.head[0].iov_len = 0;
+       xdr_init_encode(&xdr, &req->rq_snd_buf,
+                       req->rq_snd_buf.head[0].iov_base, req);
+       if (rpc_encode_header(task, &xdr))
                return;
 
-       task->tk_status = rpcauth_wrap_req(task, encode, req, p,
-                       task->tk_msg.rpc_argp);
-       if (task->tk_status == 0)
-               xprt_request_prepare(req);
+       task->tk_status = rpcauth_wrap_req(task, &xdr);
 }
 
 /*
@@ -1762,6 +1796,7 @@ call_encode(struct rpc_task *task)
 {
        if (!rpc_task_need_encode(task))
                goto out;
+       dprint_status(task);
        /* Encode here so that rpcsec_gss can use correct sequence number. */
        rpc_xdr_encode(task);
        /* Did the encode result in an error condition? */
@@ -1779,6 +1814,8 @@ call_encode(struct rpc_task *task)
                        rpc_exit(task, task->tk_status);
                }
                return;
+       } else {
+               xprt_request_prepare(task->tk_rqstp);
        }
 
        /* Add task to reply queue before transmission to avoid races */
@@ -1787,6 +1824,25 @@ call_encode(struct rpc_task *task)
        xprt_request_enqueue_transmit(task);
 out:
        task->tk_action = call_bind;
+       call_bind(task);
+}
+
+/*
+ * Helpers to check if the task was already transmitted, and
+ * to take action when that is the case.
+ */
+static bool
+rpc_task_transmitted(struct rpc_task *task)
+{
+       return !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
+}
+
+static void
+rpc_task_handle_transmitted(struct rpc_task *task)
+{
+       xprt_end_transmit(task);
+       task->tk_action = call_transmit_status;
+       call_transmit_status(task);
 }
 
 /*
@@ -1797,14 +1853,25 @@ call_bind(struct rpc_task *task)
 {
        struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
 
-       dprint_status(task);
+       if (rpc_task_transmitted(task)) {
+               rpc_task_handle_transmitted(task);
+               return;
+       }
 
-       task->tk_action = call_connect;
-       if (!xprt_bound(xprt)) {
-               task->tk_action = call_bind_status;
-               task->tk_timeout = xprt->bind_timeout;
-               xprt->ops->rpcbind(task);
+       if (xprt_bound(xprt)) {
+               task->tk_action = call_connect;
+               call_connect(task);
+               return;
        }
+
+       dprint_status(task);
+
+       task->tk_action = call_bind_status;
+       if (!xprt_prepare_transmit(task))
+               return;
+
+       task->tk_timeout = xprt->bind_timeout;
+       xprt->ops->rpcbind(task);
 }
 
 /*
@@ -1815,10 +1882,16 @@ call_bind_status(struct rpc_task *task)
 {
        int status = -EIO;
 
+       if (rpc_task_transmitted(task)) {
+               rpc_task_handle_transmitted(task);
+               return;
+       }
+
        if (task->tk_status >= 0) {
                dprint_status(task);
                task->tk_status = 0;
                task->tk_action = call_connect;
+               call_connect(task);
                return;
        }
 
@@ -1841,6 +1914,8 @@ call_bind_status(struct rpc_task *task)
                task->tk_rebind_retry--;
                rpc_delay(task, 3*HZ);
                goto retry_timeout;
+       case -EAGAIN:
+               goto retry_timeout;
        case -ETIMEDOUT:
                dprintk("RPC: %5u rpcbind request timed out\n",
                                task->tk_pid);
@@ -1882,7 +1957,8 @@ call_bind_status(struct rpc_task *task)
 
 retry_timeout:
        task->tk_status = 0;
-       task->tk_action = call_timeout;
+       task->tk_action = call_bind;
+       rpc_check_timeout(task);
 }
 
 /*
@@ -1893,21 +1969,31 @@ call_connect(struct rpc_task *task)
 {
        struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
 
+       if (rpc_task_transmitted(task)) {
+               rpc_task_handle_transmitted(task);
+               return;
+       }
+
+       if (xprt_connected(xprt)) {
+               task->tk_action = call_transmit;
+               call_transmit(task);
+               return;
+       }
+
        dprintk("RPC: %5u call_connect xprt %p %s connected\n",
                        task->tk_pid, xprt,
                        (xprt_connected(xprt) ? "is" : "is not"));
 
-       task->tk_action = call_transmit;
-       if (!xprt_connected(xprt)) {
-               task->tk_action = call_connect_status;
-               if (task->tk_status < 0)
-                       return;
-               if (task->tk_flags & RPC_TASK_NOCONNECT) {
-                       rpc_exit(task, -ENOTCONN);
-                       return;
-               }
-               xprt_connect(task);
+       task->tk_action = call_connect_status;
+       if (task->tk_status < 0)
+               return;
+       if (task->tk_flags & RPC_TASK_NOCONNECT) {
+               rpc_exit(task, -ENOTCONN);
+               return;
        }
+       if (!xprt_prepare_transmit(task))
+               return;
+       xprt_connect(task);
 }
 
 /*
@@ -1919,10 +2005,8 @@ call_connect_status(struct rpc_task *task)
        struct rpc_clnt *clnt = task->tk_client;
        int status = task->tk_status;
 
-       /* Check if the task was already transmitted */
-       if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
-               xprt_end_transmit(task);
-               task->tk_action = call_transmit_status;
+       if (rpc_task_transmitted(task)) {
+               rpc_task_handle_transmitted(task);
                return;
        }
 
@@ -1937,8 +2021,7 @@ call_connect_status(struct rpc_task *task)
                        break;
                if (clnt->cl_autobind) {
                        rpc_force_rebind(clnt);
-                       task->tk_action = call_bind;
-                       return;
+                       goto out_retry;
                }
                /* fall through */
        case -ECONNRESET:
@@ -1958,16 +2041,20 @@ call_connect_status(struct rpc_task *task)
                /* fall through */
        case -ENOTCONN:
        case -EAGAIN:
-               /* Check for timeouts before looping back to call_bind */
        case -ETIMEDOUT:
-               task->tk_action = call_timeout;
-               return;
+               goto out_retry;
        case 0:
                clnt->cl_stats->netreconn++;
                task->tk_action = call_transmit;
+               call_transmit(task);
                return;
        }
        rpc_exit(task, status);
+       return;
+out_retry:
+       /* Check for timeouts before looping back to call_bind */
+       task->tk_action = call_bind;
+       rpc_check_timeout(task);
 }
 
 /*
@@ -1976,16 +2063,28 @@ call_connect_status(struct rpc_task *task)
 static void
 call_transmit(struct rpc_task *task)
 {
+       if (rpc_task_transmitted(task)) {
+               rpc_task_handle_transmitted(task);
+               return;
+       }
+
        dprint_status(task);
 
+       task->tk_action = call_transmit_status;
+       if (!xprt_prepare_transmit(task))
+               return;
        task->tk_status = 0;
        if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
-               if (!xprt_prepare_transmit(task))
+               if (!xprt_connected(task->tk_xprt)) {
+                       task->tk_status = -ENOTCONN;
                        return;
+               }
                xprt_transmit(task);
        }
-       task->tk_action = call_transmit_status;
        xprt_end_transmit(task);
+       if (rpc_task_need_resched(task))
+               return;
+       call_transmit_status(task);
 }
 
 /*
@@ -2000,8 +2099,12 @@ call_transmit_status(struct rpc_task *task)
         * Common case: success.  Force the compiler to put this
         * test first.
         */
-       if (task->tk_status == 0) {
-               xprt_request_wait_receive(task);
+       if (rpc_task_transmitted(task)) {
+               if (task->tk_status == 0)
+                       xprt_request_wait_receive(task);
+               if (rpc_task_need_resched(task))
+                       return;
+               call_status(task);
                return;
        }
 
@@ -2038,7 +2141,7 @@ call_transmit_status(struct rpc_task *task)
                                trace_xprt_ping(task->tk_xprt,
                                                task->tk_status);
                        rpc_exit(task, task->tk_status);
-                       break;
+                       return;
                }
                /* fall through */
        case -ECONNRESET:
@@ -2046,11 +2149,25 @@ call_transmit_status(struct rpc_task *task)
        case -EADDRINUSE:
        case -ENOTCONN:
        case -EPIPE:
+               task->tk_action = call_bind;
+               task->tk_status = 0;
                break;
        }
+       rpc_check_timeout(task);
 }
 
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static void call_bc_transmit(struct rpc_task *task);
+static void call_bc_transmit_status(struct rpc_task *task);
+
+static void
+call_bc_encode(struct rpc_task *task)
+{
+       xprt_request_enqueue_transmit(task);
+       task->tk_action = call_bc_transmit;
+       call_bc_transmit(task);
+}
+
 /*
  * 5b. Send the backchannel RPC reply.  On error, drop the reply.  In
  * addition, disconnect on connectivity errors.
@@ -2058,26 +2175,23 @@ call_transmit_status(struct rpc_task *task)
 static void
 call_bc_transmit(struct rpc_task *task)
 {
-       struct rpc_rqst *req = task->tk_rqstp;
-
-       if (rpc_task_need_encode(task))
-               xprt_request_enqueue_transmit(task);
-       if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
-               goto out_wakeup;
-
-       if (!xprt_prepare_transmit(task))
-               goto out_retry;
-
-       if (task->tk_status < 0) {
-               printk(KERN_NOTICE "RPC: Could not send backchannel reply "
-                       "error: %d\n", task->tk_status);
-               goto out_done;
+       task->tk_action = call_bc_transmit_status;
+       if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
+               if (!xprt_prepare_transmit(task))
+                       return;
+               task->tk_status = 0;
+               xprt_transmit(task);
        }
+       xprt_end_transmit(task);
+}
 
-       xprt_transmit(task);
+static void
+call_bc_transmit_status(struct rpc_task *task)
+{
+       struct rpc_rqst *req = task->tk_rqstp;
 
-       xprt_end_transmit(task);
        dprint_status(task);
+
        switch (task->tk_status) {
        case 0:
                /* Success */
@@ -2091,8 +2205,14 @@ call_bc_transmit(struct rpc_task *task)
        case -ENOTCONN:
        case -EPIPE:
                break;
+       case -ENOBUFS:
+               rpc_delay(task, HZ>>2);
+               /* fall through */
+       case -EBADSLT:
        case -EAGAIN:
-               goto out_retry;
+               task->tk_status = 0;
+               task->tk_action = call_bc_transmit;
+               return;
        case -ETIMEDOUT:
                /*
                 * Problem reaching the server.  Disconnect and let the
@@ -2111,18 +2231,11 @@ call_bc_transmit(struct rpc_task *task)
                 * We were unable to reply and will have to drop the
                 * request.  The server should reconnect and retransmit.
                 */
-               WARN_ON_ONCE(task->tk_status == -EAGAIN);
                printk(KERN_NOTICE "RPC: Could not send backchannel reply "
                        "error: %d\n", task->tk_status);
                break;
        }
-out_wakeup:
-       rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
-out_done:
        task->tk_action = rpc_exit_task;
-       return;
-out_retry:
-       task->tk_status = 0;
 }
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
@@ -2143,6 +2256,7 @@ call_status(struct rpc_task *task)
        status = task->tk_status;
        if (status >= 0) {
                task->tk_action = call_decode;
+               call_decode(task);
                return;
        }
 
@@ -2154,10 +2268,8 @@ call_status(struct rpc_task *task)
        case -EHOSTUNREACH:
        case -ENETUNREACH:
        case -EPERM:
-               if (RPC_IS_SOFTCONN(task)) {
-                       rpc_exit(task, status);
-                       break;
-               }
+               if (RPC_IS_SOFTCONN(task))
+                       goto out_exit;
                /*
                 * Delay any retries for 3 seconds, then handle as if it
                 * were a timeout.
@@ -2165,7 +2277,6 @@ call_status(struct rpc_task *task)
                rpc_delay(task, 3*HZ);
                /* fall through */
        case -ETIMEDOUT:
-               task->tk_action = call_timeout;
                break;
        case -ECONNREFUSED:
        case -ECONNRESET:
@@ -2178,34 +2289,30 @@ call_status(struct rpc_task *task)
        case -EPIPE:
        case -ENOTCONN:
        case -EAGAIN:
-               task->tk_action = call_encode;
                break;
        case -EIO:
                /* shutdown or soft timeout */
-               rpc_exit(task, status);
-               break;
+               goto out_exit;
        default:
                if (clnt->cl_chatty)
                        printk("%s: RPC call returned error %d\n",
                               clnt->cl_program->name, -status);
-               rpc_exit(task, status);
+               goto out_exit;
        }
+       task->tk_action = call_encode;
+       rpc_check_timeout(task);
+       return;
+out_exit:
+       rpc_exit(task, status);
 }
 
-/*
- * 6a. Handle RPC timeout
- *     We do not release the request slot, so we keep using the
- *     same XID for all retransmits.
- */
 static void
-call_timeout(struct rpc_task *task)
+rpc_check_timeout(struct rpc_task *task)
 {
        struct rpc_clnt *clnt = task->tk_client;
 
-       if (xprt_adjust_timeout(task->tk_rqstp) == 0) {
-               dprintk("RPC: %5u call_timeout (minor)\n", task->tk_pid);
-               goto retry;
-       }
+       if (xprt_adjust_timeout(task->tk_rqstp) == 0)
+               return;
 
        dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid);
        task->tk_timeouts++;
@@ -2241,10 +2348,6 @@ call_timeout(struct rpc_task *task)
         * event? RFC2203 requires the server to drop all such requests.
         */
        rpcauth_invalcred(task);
-
-retry:
-       task->tk_action = call_encode;
-       task->tk_status = 0;
 }
 
 /*
@@ -2255,12 +2358,11 @@ call_decode(struct rpc_task *task)
 {
        struct rpc_clnt *clnt = task->tk_client;
        struct rpc_rqst *req = task->tk_rqstp;
-       kxdrdproc_t     decode = task->tk_msg.rpc_proc->p_decode;
-       __be32          *p;
+       struct xdr_stream xdr;
 
        dprint_status(task);
 
-       if (!decode) {
+       if (!task->tk_msg.rpc_proc->p_decode) {
                task->tk_action = rpc_exit_task;
                return;
        }
@@ -2285,223 +2387,195 @@ call_decode(struct rpc_task *task)
        WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf,
                                sizeof(req->rq_rcv_buf)) != 0);
 
-       if (req->rq_rcv_buf.len < 12) {
-               if (!RPC_IS_SOFT(task)) {
-                       task->tk_action = call_encode;
-                       goto out_retry;
-               }
-               dprintk("RPC:       %s: too small RPC reply size (%d bytes)\n",
-                               clnt->cl_program->name, task->tk_status);
-               task->tk_action = call_timeout;
+       if (req->rq_rcv_buf.len < 12)
                goto out_retry;
-       }
 
-       p = rpc_verify_header(task);
-       if (IS_ERR(p)) {
-               if (p == ERR_PTR(-EAGAIN))
-                       goto out_retry;
+       xdr_init_decode(&xdr, &req->rq_rcv_buf,
+                       req->rq_rcv_buf.head[0].iov_base, req);
+       switch (rpc_decode_header(task, &xdr)) {
+       case 0:
+               task->tk_action = rpc_exit_task;
+               task->tk_status = rpcauth_unwrap_resp(task, &xdr);
+               dprintk("RPC: %5u %s result %d\n",
+                       task->tk_pid, __func__, task->tk_status);
                return;
-       }
-       task->tk_action = rpc_exit_task;
-
-       task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
-                                             task->tk_msg.rpc_resp);
-
-       dprintk("RPC: %5u call_decode result %d\n", task->tk_pid,
-                       task->tk_status);
-       return;
+       case -EAGAIN:
 out_retry:
-       task->tk_status = 0;
-       /* Note: rpc_verify_header() may have freed the RPC slot */
-       if (task->tk_rqstp == req) {
-               xdr_free_bvec(&req->rq_rcv_buf);
-               req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0;
-               if (task->tk_client->cl_discrtry)
-                       xprt_conditional_disconnect(req->rq_xprt,
-                                       req->rq_connect_cookie);
+               task->tk_status = 0;
+               /* Note: rpc_decode_header() may have freed the RPC slot */
+               if (task->tk_rqstp == req) {
+                       xdr_free_bvec(&req->rq_rcv_buf);
+                       req->rq_reply_bytes_recvd = 0;
+                       req->rq_rcv_buf.len = 0;
+                       if (task->tk_client->cl_discrtry)
+                               xprt_conditional_disconnect(req->rq_xprt,
+                                                           req->rq_connect_cookie);
+               }
+               task->tk_action = call_encode;
+               rpc_check_timeout(task);
        }
 }
 
-static __be32 *
-rpc_encode_header(struct rpc_task *task)
+static int
+rpc_encode_header(struct rpc_task *task, struct xdr_stream *xdr)
 {
        struct rpc_clnt *clnt = task->tk_client;
        struct rpc_rqst *req = task->tk_rqstp;
-       __be32          *p = req->rq_svec[0].iov_base;
-
-       /* FIXME: check buffer size? */
-
-       p = xprt_skip_transport_header(req->rq_xprt, p);
-       *p++ = req->rq_xid;             /* XID */
-       *p++ = htonl(RPC_CALL);         /* CALL */
-       *p++ = htonl(RPC_VERSION);      /* RPC version */
-       *p++ = htonl(clnt->cl_prog);    /* program number */
-       *p++ = htonl(clnt->cl_vers);    /* program version */
-       *p++ = htonl(task->tk_msg.rpc_proc->p_proc);    /* procedure */
-       p = rpcauth_marshcred(task, p);
-       if (p)
-               req->rq_slen = xdr_adjust_iovec(&req->rq_svec[0], p);
-       return p;
+       __be32 *p;
+       int error;
+
+       error = -EMSGSIZE;
+       p = xdr_reserve_space(xdr, RPC_CALLHDRSIZE << 2);
+       if (!p)
+               goto out_fail;
+       *p++ = req->rq_xid;
+       *p++ = rpc_call;
+       *p++ = cpu_to_be32(RPC_VERSION);
+       *p++ = cpu_to_be32(clnt->cl_prog);
+       *p++ = cpu_to_be32(clnt->cl_vers);
+       *p   = cpu_to_be32(task->tk_msg.rpc_proc->p_proc);
+
+       error = rpcauth_marshcred(task, xdr);
+       if (error < 0)
+               goto out_fail;
+       return 0;
+out_fail:
+       trace_rpc_bad_callhdr(task);
+       rpc_exit(task, error);
+       return error;
 }
 
-static __be32 *
-rpc_verify_header(struct rpc_task *task)
+static noinline int
+rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr)
 {
        struct rpc_clnt *clnt = task->tk_client;
-       struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
-       int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
-       __be32  *p = iov->iov_base;
-       u32 n;
        int error = -EACCES;
+       __be32 *p;
 
-       if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) {
-               /* RFC-1014 says that the representation of XDR data must be a
-                * multiple of four bytes
-                * - if it isn't pointer subtraction in the NFS client may give
-                *   undefined results
-                */
-               dprintk("RPC: %5u %s: XDR representation not a multiple of"
-                      " 4 bytes: 0x%x\n", task->tk_pid, __func__,
-                      task->tk_rqstp->rq_rcv_buf.len);
-               error = -EIO;
-               goto out_err;
-       }
-       if ((len -= 3) < 0)
-               goto out_overflow;
-
-       p += 1; /* skip XID */
-       if ((n = ntohl(*p++)) != RPC_REPLY) {
-               dprintk("RPC: %5u %s: not an RPC reply: %x\n",
-                       task->tk_pid, __func__, n);
-               error = -EIO;
-               goto out_garbage;
-       }
-
-       if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
-               if (--len < 0)
-                       goto out_overflow;
-               switch ((n = ntohl(*p++))) {
-               case RPC_AUTH_ERROR:
-                       break;
-               case RPC_MISMATCH:
-                       dprintk("RPC: %5u %s: RPC call version mismatch!\n",
-                               task->tk_pid, __func__);
-                       error = -EPROTONOSUPPORT;
-                       goto out_err;
-               default:
-                       dprintk("RPC: %5u %s: RPC call rejected, "
-                               "unknown error: %x\n",
-                               task->tk_pid, __func__, n);
-                       error = -EIO;
-                       goto out_err;
-               }
-               if (--len < 0)
-                       goto out_overflow;
-               switch ((n = ntohl(*p++))) {
-               case RPC_AUTH_REJECTEDCRED:
-               case RPC_AUTH_REJECTEDVERF:
-               case RPCSEC_GSS_CREDPROBLEM:
-               case RPCSEC_GSS_CTXPROBLEM:
-                       if (!task->tk_cred_retry)
-                               break;
-                       task->tk_cred_retry--;
-                       dprintk("RPC: %5u %s: retry stale creds\n",
-                                       task->tk_pid, __func__);
-                       rpcauth_invalcred(task);
-                       /* Ensure we obtain a new XID! */
-                       xprt_release(task);
-                       task->tk_action = call_reserve;
-                       goto out_retry;
-               case RPC_AUTH_BADCRED:
-               case RPC_AUTH_BADVERF:
-                       /* possibly garbled cred/verf? */
-                       if (!task->tk_garb_retry)
-                               break;
-                       task->tk_garb_retry--;
-                       dprintk("RPC: %5u %s: retry garbled creds\n",
-                                       task->tk_pid, __func__);
-                       task->tk_action = call_encode;
-                       goto out_retry;
-               case RPC_AUTH_TOOWEAK:
-                       printk(KERN_NOTICE "RPC: server %s requires stronger "
-                              "authentication.\n",
-                              task->tk_xprt->servername);
-                       break;
-               default:
-                       dprintk("RPC: %5u %s: unknown auth error: %x\n",
-                                       task->tk_pid, __func__, n);
-                       error = -EIO;
-               }
-               dprintk("RPC: %5u %s: call rejected %d\n",
-                               task->tk_pid, __func__, n);
-               goto out_err;
-       }
-       p = rpcauth_checkverf(task, p);
-       if (IS_ERR(p)) {
-               error = PTR_ERR(p);
-               dprintk("RPC: %5u %s: auth check failed with %d\n",
-                               task->tk_pid, __func__, error);
-               goto out_garbage;               /* bad verifier, retry */
-       }
-       len = p - (__be32 *)iov->iov_base - 1;
-       if (len < 0)
-               goto out_overflow;
-       switch ((n = ntohl(*p++))) {
-       case RPC_SUCCESS:
-               return p;
-       case RPC_PROG_UNAVAIL:
-               dprintk("RPC: %5u %s: program %u is unsupported "
-                               "by server %s\n", task->tk_pid, __func__,
-                               (unsigned int)clnt->cl_prog,
-                               task->tk_xprt->servername);
+       /* RFC-1014 says that the representation of XDR data must be a
+        * multiple of four bytes
+        * - if it isn't pointer subtraction in the NFS client may give
+        *   undefined results
+        */
+       if (task->tk_rqstp->rq_rcv_buf.len & 3)
+               goto out_badlen;
+
+       p = xdr_inline_decode(xdr, 3 * sizeof(*p));
+       if (!p)
+               goto out_unparsable;
+       p++;    /* skip XID */
+       if (*p++ != rpc_reply)
+               goto out_unparsable;
+       if (*p++ != rpc_msg_accepted)
+               goto out_msg_denied;
+
+       error = rpcauth_checkverf(task, xdr);
+       if (error)
+               goto out_verifier;
+
+       p = xdr_inline_decode(xdr, sizeof(*p));
+       if (!p)
+               goto out_unparsable;
+       switch (*p) {
+       case rpc_success:
+               return 0;
+       case rpc_prog_unavail:
+               trace_rpc__prog_unavail(task);
                error = -EPFNOSUPPORT;
                goto out_err;
-       case RPC_PROG_MISMATCH:
-               dprintk("RPC: %5u %s: program %u, version %u unsupported "
-                               "by server %s\n", task->tk_pid, __func__,
-                               (unsigned int)clnt->cl_prog,
-                               (unsigned int)clnt->cl_vers,
-                               task->tk_xprt->servername);
+       case rpc_prog_mismatch:
+               trace_rpc__prog_mismatch(task);
                error = -EPROTONOSUPPORT;
                goto out_err;
-       case RPC_PROC_UNAVAIL:
-               dprintk("RPC: %5u %s: proc %s unsupported by program %u, "
-                               "version %u on server %s\n",
-                               task->tk_pid, __func__,
-                               rpc_proc_name(task),
-                               clnt->cl_prog, clnt->cl_vers,
-                               task->tk_xprt->servername);
+       case rpc_proc_unavail:
+               trace_rpc__proc_unavail(task);
                error = -EOPNOTSUPP;
                goto out_err;
-       case RPC_GARBAGE_ARGS:
-               dprintk("RPC: %5u %s: server saw garbage\n",
-                               task->tk_pid, __func__);
-               break;                  /* retry */
+       case rpc_garbage_args:
+               trace_rpc__garbage_args(task);
+               break;
        default:
-               dprintk("RPC: %5u %s: server accept status: %x\n",
-                               task->tk_pid, __func__, n);
-               /* Also retry */
+               trace_rpc__unparsable(task);
        }
 
 out_garbage:
        clnt->cl_stats->rpcgarbage++;
        if (task->tk_garb_retry) {
                task->tk_garb_retry--;
-               dprintk("RPC: %5u %s: retrying\n",
-                               task->tk_pid, __func__);
                task->tk_action = call_encode;
-out_retry:
-               return ERR_PTR(-EAGAIN);
+               return -EAGAIN;
        }
 out_err:
        rpc_exit(task, error);
-       dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid,
-                       __func__, error);
-       return ERR_PTR(error);
-out_overflow:
-       dprintk("RPC: %5u %s: server reply was truncated.\n", task->tk_pid,
-                       __func__);
+       return error;
+
+out_badlen:
+       trace_rpc__unparsable(task);
+       error = -EIO;
+       goto out_err;
+
+out_unparsable:
+       trace_rpc__unparsable(task);
+       error = -EIO;
+       goto out_garbage;
+
+out_verifier:
+       trace_rpc_bad_verifier(task);
        goto out_garbage;
+
+out_msg_denied:
+       p = xdr_inline_decode(xdr, sizeof(*p));
+       if (!p)
+               goto out_unparsable;
+       switch (*p++) {
+       case rpc_auth_error:
+               break;
+       case rpc_mismatch:
+               trace_rpc__mismatch(task);
+               error = -EPROTONOSUPPORT;
+               goto out_err;
+       default:
+               trace_rpc__unparsable(task);
+               error = -EIO;
+               goto out_err;
+       }
+
+       p = xdr_inline_decode(xdr, sizeof(*p));
+       if (!p)
+               goto out_unparsable;
+       switch (*p++) {
+       case rpc_autherr_rejectedcred:
+       case rpc_autherr_rejectedverf:
+       case rpcsec_gsserr_credproblem:
+       case rpcsec_gsserr_ctxproblem:
+               if (!task->tk_cred_retry)
+                       break;
+               task->tk_cred_retry--;
+               trace_rpc__stale_creds(task);
+               rpcauth_invalcred(task);
+               /* Ensure we obtain a new XID! */
+               xprt_release(task);
+               task->tk_action = call_reserve;
+               return -EAGAIN;
+       case rpc_autherr_badcred:
+       case rpc_autherr_badverf:
+               /* possibly garbled cred/verf? */
+               if (!task->tk_garb_retry)
+                       break;
+               task->tk_garb_retry--;
+               trace_rpc__bad_creds(task);
+               task->tk_action = call_encode;
+               return -EAGAIN;
+       case rpc_autherr_tooweak:
+               trace_rpc__auth_tooweak(task);
+               pr_warn("RPC: server %s requires stronger authentication.\n",
+                       task->tk_xprt->servername);
+               break;
+       default:
+               trace_rpc__unparsable(task);
+               error = -EIO;
+       }
+       goto out_err;
 }
 
 static void rpcproc_encode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
index adc3c40cc733ca12b4424aefd4635e9f290f7433..28956c70100af03916fdcf19a520e7c784d1ba3b 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
 #include <linux/freezer.h>
+#include <linux/sched/mm.h>
 
 #include <linux/sunrpc/clnt.h>
 
@@ -784,8 +785,7 @@ void rpc_exit(struct rpc_task *task, int status)
 {
        task->tk_status = status;
        task->tk_action = rpc_exit_task;
-       if (RPC_IS_QUEUED(task))
-               rpc_wake_up_queued_task(task->tk_waitqueue, task);
+       rpc_wake_up_queued_task(task->tk_waitqueue, task);
 }
 EXPORT_SYMBOL_GPL(rpc_exit);
 
@@ -902,7 +902,10 @@ void rpc_execute(struct rpc_task *task)
 
 static void rpc_async_schedule(struct work_struct *work)
 {
+       unsigned int pflags = memalloc_nofs_save();
+
        __rpc_execute(container_of(work, struct rpc_task, u.tk_work));
+       memalloc_nofs_restore(pflags);
 }
 
 /**
@@ -921,16 +924,13 @@ static void rpc_async_schedule(struct work_struct *work)
  * Most requests are 'small' (under 2KiB) and can be serviced from a
  * mempool, ensuring that NFS reads and writes can always proceed,
  * and that there is good locality of reference for these buffers.
- *
- * In order to avoid memory starvation triggering more writebacks of
- * NFS requests, we avoid using GFP_KERNEL.
  */
 int rpc_malloc(struct rpc_task *task)
 {
        struct rpc_rqst *rqst = task->tk_rqstp;
        size_t size = rqst->rq_callsize + rqst->rq_rcvsize;
        struct rpc_buffer *buf;
-       gfp_t gfp = GFP_NOIO | __GFP_NOWARN;
+       gfp_t gfp = GFP_NOFS;
 
        if (RPC_IS_SWAPPER(task))
                gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
@@ -1011,7 +1011,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
 static struct rpc_task *
 rpc_alloc_task(void)
 {
-       return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO);
+       return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
 }
 
 /*
@@ -1067,7 +1067,10 @@ static void rpc_free_task(struct rpc_task *task)
 
 static void rpc_async_release(struct work_struct *work)
 {
+       unsigned int pflags = memalloc_nofs_save();
+
        rpc_free_task(container_of(work, struct rpc_task, u.tk_work));
+       memalloc_nofs_restore(pflags);
 }
 
 static void rpc_release_resources_task(struct rpc_task *task)
index e87ddb9f7feb1ce6385cec4d01e1f8270511d77e..dbd19697ee38e40d5670e31d2b0392beb66aea2e 100644 (file)
@@ -1144,17 +1144,6 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
 static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {}
 #endif
 
-/*
- * Setup response header for TCP, it has a 4B record length field.
- */
-static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
-{
-       struct kvec *resv = &rqstp->rq_res.head[0];
-
-       /* tcp needs a space for the record length... */
-       svc_putnl(resv, 0);
-}
-
 /*
  * Common routine for processing the RPC request.
  */
@@ -1182,10 +1171,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
        set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
        clear_bit(RQ_DROPME, &rqstp->rq_flags);
 
-       /* Setup reply header */
-       if (rqstp->rq_prot == IPPROTO_TCP)
-               svc_tcp_prep_reply_hdr(rqstp);
-
        svc_putu32(resv, rqstp->rq_xid);
 
        vers = svc_getnl(argv);
@@ -1443,6 +1428,10 @@ svc_process(struct svc_rqst *rqstp)
                goto out_drop;
        }
 
+       /* Reserve space for the record marker */
+       if (rqstp->rq_prot == IPPROTO_TCP)
+               svc_putnl(resv, 0);
+
        /* Returns 1 for send, 0 for drop */
        if (likely(svc_process_common(rqstp, argv, resv)))
                return svc_send(rqstp);
index f302c6eb8779063a71b9a590325a96b8026ab3e6..aa8177ddcbda81e157fd886f2696da1dd8186c60 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/bvec.h>
+#include <trace/events/sunrpc.h>
 
 /*
  * XDR functions for basic NFS types
@@ -162,6 +163,15 @@ xdr_free_bvec(struct xdr_buf *buf)
        buf->bvec = NULL;
 }
 
+/**
+ * xdr_inline_pages - Prepare receive buffer for a large reply
+ * @xdr: xdr_buf into which reply will be placed
+ * @offset: expected offset where data payload will start, in bytes
+ * @pages: vector of struct page pointers
+ * @base: offset in first page where receive should start, in bytes
+ * @len: expected size of the upper layer data payload, in bytes
+ *
+ */
 void
 xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
                 struct page **pages, unsigned int base, unsigned int len)
@@ -179,6 +189,8 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
 
        tail->iov_base = buf + offset;
        tail->iov_len = buflen - offset;
+       if ((xdr->page_len & 3) == 0)
+               tail->iov_len -= sizeof(__be32);
 
        xdr->buflen += len;
 }
@@ -346,13 +358,15 @@ EXPORT_SYMBOL_GPL(_copy_from_pages);
  * 'len' bytes. The extra data is not lost, but is instead
  * moved into the inlined pages and/or the tail.
  */
-static void
+static unsigned int
 xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
 {
        struct kvec *head, *tail;
        size_t copy, offs;
        unsigned int pglen = buf->page_len;
+       unsigned int result;
 
+       result = 0;
        tail = buf->tail;
        head = buf->head;
 
@@ -366,6 +380,7 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
                        copy = tail->iov_len - len;
                        memmove((char *)tail->iov_base + len,
                                        tail->iov_base, copy);
+                       result += copy;
                }
                /* Copy from the inlined pages into the tail */
                copy = len;
@@ -376,11 +391,13 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
                        copy = 0;
                else if (copy > tail->iov_len - offs)
                        copy = tail->iov_len - offs;
-               if (copy != 0)
+               if (copy != 0) {
                        _copy_from_pages((char *)tail->iov_base + offs,
                                        buf->pages,
                                        buf->page_base + pglen + offs - len,
                                        copy);
+                       result += copy;
+               }
                /* Do we also need to copy data from the head into the tail ? */
                if (len > pglen) {
                        offs = copy = len - pglen;
@@ -390,6 +407,7 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
                                        (char *)head->iov_base +
                                        head->iov_len - offs,
                                        copy);
+                       result += copy;
                }
        }
        /* Now handle pages */
@@ -405,12 +423,15 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
                _copy_to_pages(buf->pages, buf->page_base,
                                (char *)head->iov_base + head->iov_len - len,
                                copy);
+               result += copy;
        }
        head->iov_len -= len;
        buf->buflen -= len;
        /* Have we truncated the message? */
        if (buf->len > buf->buflen)
                buf->len = buf->buflen;
+
+       return result;
 }
 
 /**
@@ -422,14 +443,16 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
  * 'len' bytes. The extra data is not lost, but is instead
  * moved into the tail.
  */
-static void
+static unsigned int
 xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
 {
        struct kvec *tail;
        size_t copy;
        unsigned int pglen = buf->page_len;
        unsigned int tailbuf_len;
+       unsigned int result;
 
+       result = 0;
        tail = buf->tail;
        BUG_ON (len > pglen);
 
@@ -447,18 +470,22 @@ xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
                if (tail->iov_len > len) {
                        char *p = (char *)tail->iov_base + len;
                        memmove(p, tail->iov_base, tail->iov_len - len);
+                       result += tail->iov_len - len;
                } else
                        copy = tail->iov_len;
                /* Copy from the inlined pages into the tail */
                _copy_from_pages((char *)tail->iov_base,
                                buf->pages, buf->page_base + pglen - len,
                                copy);
+               result += copy;
        }
        buf->page_len -= len;
        buf->buflen -= len;
        /* Have we truncated the message? */
        if (buf->len > buf->buflen)
                buf->len = buf->buflen;
+
+       return result;
 }
 
 void
@@ -483,6 +510,7 @@ EXPORT_SYMBOL_GPL(xdr_stream_pos);
  * @xdr: pointer to xdr_stream struct
  * @buf: pointer to XDR buffer in which to encode data
  * @p: current pointer inside XDR buffer
+ * @rqst: pointer to controlling rpc_rqst, for debugging
  *
  * Note: at the moment the RPC client only passes the length of our
  *      scratch buffer in the xdr_buf's header kvec. Previously this
@@ -491,7 +519,8 @@ EXPORT_SYMBOL_GPL(xdr_stream_pos);
  *      of the buffer length, and takes care of adjusting the kvec
  *      length for us.
  */
-void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
+void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
+                    struct rpc_rqst *rqst)
 {
        struct kvec *iov = buf->head;
        int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
@@ -513,6 +542,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
                buf->len += len;
                iov->iov_len += len;
        }
+       xdr->rqst = rqst;
 }
 EXPORT_SYMBOL_GPL(xdr_init_encode);
 
@@ -551,9 +581,9 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
        int frag1bytes, frag2bytes;
 
        if (nbytes > PAGE_SIZE)
-               return NULL; /* Bigger buffers require special handling */
+               goto out_overflow; /* Bigger buffers require special handling */
        if (xdr->buf->len + nbytes > xdr->buf->buflen)
-               return NULL; /* Sorry, we're totally out of space */
+               goto out_overflow; /* Sorry, we're totally out of space */
        frag1bytes = (xdr->end - xdr->p) << 2;
        frag2bytes = nbytes - frag1bytes;
        if (xdr->iov)
@@ -582,6 +612,9 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
        xdr->buf->page_len += frag2bytes;
        xdr->buf->len += nbytes;
        return p;
+out_overflow:
+       trace_rpc_xdr_overflow(xdr, nbytes);
+       return NULL;
 }
 
 /**
@@ -819,8 +852,10 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr)
  * @xdr: pointer to xdr_stream struct
  * @buf: pointer to XDR buffer from which to decode data
  * @p: current pointer inside XDR buffer
+ * @rqst: pointer to controlling rpc_rqst, for debugging
  */
-void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
+void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
+                    struct rpc_rqst *rqst)
 {
        xdr->buf = buf;
        xdr->scratch.iov_base = NULL;
@@ -836,6 +871,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
                xdr->nwords -= p - xdr->p;
                xdr->p = p;
        }
+       xdr->rqst = rqst;
 }
 EXPORT_SYMBOL_GPL(xdr_init_decode);
 
@@ -854,7 +890,7 @@ void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
        buf->page_len =  len;
        buf->buflen =  len;
        buf->len = len;
-       xdr_init_decode(xdr, buf, NULL);
+       xdr_init_decode(xdr, buf, NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(xdr_init_decode_pages);
 
@@ -896,20 +932,23 @@ static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes)
        size_t cplen = (char *)xdr->end - (char *)xdr->p;
 
        if (nbytes > xdr->scratch.iov_len)
-               return NULL;
+               goto out_overflow;
        p = __xdr_inline_decode(xdr, cplen);
        if (p == NULL)
                return NULL;
        memcpy(cpdest, p, cplen);
+       if (!xdr_set_next_buffer(xdr))
+               goto out_overflow;
        cpdest += cplen;
        nbytes -= cplen;
-       if (!xdr_set_next_buffer(xdr))
-               return NULL;
        p = __xdr_inline_decode(xdr, nbytes);
        if (p == NULL)
                return NULL;
        memcpy(cpdest, p, nbytes);
        return xdr->scratch.iov_base;
+out_overflow:
+       trace_rpc_xdr_overflow(xdr, nbytes);
+       return NULL;
 }
 
 /**
@@ -926,14 +965,17 @@ __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
 {
        __be32 *p;
 
-       if (nbytes == 0)
+       if (unlikely(nbytes == 0))
                return xdr->p;
        if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr))
-               return NULL;
+               goto out_overflow;
        p = __xdr_inline_decode(xdr, nbytes);
        if (p != NULL)
                return p;
        return xdr_copy_to_scratch(xdr, nbytes);
+out_overflow:
+       trace_rpc_xdr_overflow(xdr, nbytes);
+       return NULL;
 }
 EXPORT_SYMBOL_GPL(xdr_inline_decode);
 
@@ -943,13 +985,17 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
        struct kvec *iov;
        unsigned int nwords = XDR_QUADLEN(len);
        unsigned int cur = xdr_stream_pos(xdr);
+       unsigned int copied, offset;
 
        if (xdr->nwords == 0)
                return 0;
+
        /* Realign pages to current pointer position */
-       iov  = buf->head;
+       iov = buf->head;
        if (iov->iov_len > cur) {
-               xdr_shrink_bufhead(buf, iov->iov_len - cur);
+               offset = iov->iov_len - cur;
+               copied = xdr_shrink_bufhead(buf, offset);
+               trace_rpc_xdr_alignment(xdr, offset, copied);
                xdr->nwords = XDR_QUADLEN(buf->len - cur);
        }
 
@@ -961,7 +1007,9 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
                len = buf->page_len;
        else if (nwords < xdr->nwords) {
                /* Truncate page data and move it into the tail */
-               xdr_shrink_pagelen(buf, buf->page_len - len);
+               offset = buf->page_len - len;
+               copied = xdr_shrink_pagelen(buf, offset);
+               trace_rpc_xdr_alignment(xdr, offset, copied);
                xdr->nwords = XDR_QUADLEN(buf->len - cur);
        }
        return len;
@@ -1102,47 +1150,6 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
 }
 EXPORT_SYMBOL_GPL(xdr_buf_subsegment);
 
-/**
- * xdr_buf_trim - lop at most "len" bytes off the end of "buf"
- * @buf: buf to be trimmed
- * @len: number of bytes to reduce "buf" by
- *
- * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note
- * that it's possible that we'll trim less than that amount if the xdr_buf is
- * too small, or if (for instance) it's all in the head and the parser has
- * already read too far into it.
- */
-void xdr_buf_trim(struct xdr_buf *buf, unsigned int len)
-{
-       size_t cur;
-       unsigned int trim = len;
-
-       if (buf->tail[0].iov_len) {
-               cur = min_t(size_t, buf->tail[0].iov_len, trim);
-               buf->tail[0].iov_len -= cur;
-               trim -= cur;
-               if (!trim)
-                       goto fix_len;
-       }
-
-       if (buf->page_len) {
-               cur = min_t(unsigned int, buf->page_len, trim);
-               buf->page_len -= cur;
-               trim -= cur;
-               if (!trim)
-                       goto fix_len;
-       }
-
-       if (buf->head[0].iov_len) {
-               cur = min_t(size_t, buf->head[0].iov_len, trim);
-               buf->head[0].iov_len -= cur;
-               trim -= cur;
-       }
-fix_len:
-       buf->len -= (len - trim);
-}
-EXPORT_SYMBOL_GPL(xdr_buf_trim);
-
 static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
 {
        unsigned int this_len;
index f1ec2110efebe6315523036bd11e30dbdf7c8de4..e096c5a725dff123a347741bffcfdef13227218f 100644 (file)
@@ -49,6 +49,7 @@
 #include <linux/sunrpc/metrics.h>
 #include <linux/sunrpc/bc_xprt.h>
 #include <linux/rcupdate.h>
+#include <linux/sched/mm.h>
 
 #include <trace/events/sunrpc.h>
 
@@ -643,11 +644,13 @@ static void xprt_autoclose(struct work_struct *work)
 {
        struct rpc_xprt *xprt =
                container_of(work, struct rpc_xprt, task_cleanup);
+       unsigned int pflags = memalloc_nofs_save();
 
        clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
        xprt->ops->close(xprt);
        xprt_release_write(xprt, NULL);
        wake_up_bit(&xprt->state, XPRT_LOCKED);
+       memalloc_nofs_restore(pflags);
 }
 
 /**
@@ -1165,6 +1168,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
                                /* Note: req is added _before_ pos */
                                list_add_tail(&req->rq_xmit, &pos->rq_xmit);
                                INIT_LIST_HEAD(&req->rq_xmit2);
+                               trace_xprt_enq_xmit(task, 1);
                                goto out;
                        }
                } else if (RPC_IS_SWAPPER(task)) {
@@ -1176,6 +1180,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
                                /* Note: req is added _before_ pos */
                                list_add_tail(&req->rq_xmit, &pos->rq_xmit);
                                INIT_LIST_HEAD(&req->rq_xmit2);
+                               trace_xprt_enq_xmit(task, 2);
                                goto out;
                        }
                } else if (!req->rq_seqno) {
@@ -1184,11 +1189,13 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
                                        continue;
                                list_add_tail(&req->rq_xmit2, &pos->rq_xmit2);
                                INIT_LIST_HEAD(&req->rq_xmit);
+                               trace_xprt_enq_xmit(task, 3);
                                goto out;
                        }
                }
                list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
                INIT_LIST_HEAD(&req->rq_xmit2);
+               trace_xprt_enq_xmit(task, 4);
 out:
                set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
                spin_unlock(&xprt->queue_lock);
@@ -1313,8 +1320,6 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
        int is_retrans = RPC_WAS_SENT(task);
        int status;
 
-       dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
-
        if (!req->rq_bytes_sent) {
                if (xprt_request_data_received(task)) {
                        status = 0;
@@ -1325,6 +1330,13 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
                        status = -EBADMSG;
                        goto out_dequeue;
                }
+               if (task->tk_ops->rpc_call_prepare_transmit) {
+                       task->tk_ops->rpc_call_prepare_transmit(task,
+                                       task->tk_calldata);
+                       status = task->tk_status;
+                       if (status < 0)
+                               goto out_dequeue;
+               }
        }
 
        /*
@@ -1336,9 +1348,9 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
 
        connect_cookie = xprt->connect_cookie;
        status = xprt->ops->send_request(req);
-       trace_xprt_transmit(xprt, req->rq_xid, status);
        if (status != 0) {
                req->rq_ntrans--;
+               trace_xprt_transmit(req, status);
                return status;
        }
 
@@ -1347,7 +1359,6 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
 
        xprt_inject_disconnect(xprt);
 
-       dprintk("RPC: %5u xmit complete\n", task->tk_pid);
        task->tk_flags |= RPC_TASK_SENT;
        spin_lock_bh(&xprt->transport_lock);
 
@@ -1360,6 +1371,7 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
 
        req->rq_connect_cookie = connect_cookie;
 out_dequeue:
+       trace_xprt_transmit(req, status);
        xprt_request_dequeue_transmit(task);
        rpc_wake_up_queued_task_set_status(&xprt->sending, task, status);
        return status;
@@ -1599,7 +1611,6 @@ xprt_request_init(struct rpc_task *task)
        req->rq_buffer  = NULL;
        req->rq_xid     = xprt_alloc_xid(xprt);
        xprt_init_connect_cookie(req, xprt);
-       req->rq_bytes_sent = 0;
        req->rq_snd_buf.len = 0;
        req->rq_snd_buf.buflen = 0;
        req->rq_rcv_buf.len = 0;
@@ -1721,6 +1732,7 @@ void xprt_release(struct rpc_task *task)
                xprt->ops->buf_free(task);
        xprt_inject_disconnect(xprt);
        xdr_free_bvec(&req->rq_rcv_buf);
+       xdr_free_bvec(&req->rq_snd_buf);
        if (req->rq_cred != NULL)
                put_rpccred(req->rq_cred);
        task->tk_rqstp = NULL;
@@ -1749,7 +1761,6 @@ xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
         */
        xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
                xbufp->tail[0].iov_len;
-       req->rq_bytes_sent = 0;
 }
 #endif
 
index 0de9b3e63770f21b0827e035349d5819fb1ab964..d79b18c1f4cd8b3ba140a1c3eac6ea8016004aee 100644 (file)
@@ -123,7 +123,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
 
        rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
        xdr_init_encode(&req->rl_stream, &req->rl_hdrbuf,
-                       req->rl_rdmabuf->rg_base);
+                       req->rl_rdmabuf->rg_base, rqst);
 
        p = xdr_reserve_space(&req->rl_stream, 28);
        if (unlikely(!p))
@@ -267,7 +267,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
 
        /* Prepare rqst */
        rqst->rq_reply_bytes_recvd = 0;
-       rqst->rq_bytes_sent = 0;
        rqst->rq_xid = *p;
 
        rqst->rq_private_buf.len = size;
index 6a561056b53803a3179def1be4e2371f082aabfb..52cb6c1b0c2bc0c3ce84ec485fd1657c3dbd24b4 100644 (file)
@@ -391,7 +391,7 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
  */
 struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
                                struct rpcrdma_mr_seg *seg,
-                               int nsegs, bool writing, u32 xid,
+                               int nsegs, bool writing, __be32 xid,
                                struct rpcrdma_mr **out)
 {
        struct rpcrdma_ia *ia = &r_xprt->rx_ia;
@@ -446,7 +446,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
                goto out_mapmr_err;
 
        ibmr->iova &= 0x00000000ffffffff;
-       ibmr->iova |= ((u64)cpu_to_be32(xid)) << 32;
+       ibmr->iova |= ((u64)be32_to_cpu(xid)) << 32;
        key = (u8)(ibmr->rkey & 0x000000FF);
        ib_update_fast_reg_key(ibmr, ++key);
 
index d18614e02b4e8d5e31e00795397b64cc7bfc0463..6c1fb270f12763f2b67bab6548c8410bd8e4cee2 100644 (file)
@@ -164,6 +164,21 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
        return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read;
 }
 
+/* The client is required to provide a Reply chunk if the maximum
+ * size of the non-payload part of the RPC Reply is larger than
+ * the inline threshold.
+ */
+static bool
+rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
+                         const struct rpc_rqst *rqst)
+{
+       const struct xdr_buf *buf = &rqst->rq_rcv_buf;
+       const struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+
+       return buf->head[0].iov_len + buf->tail[0].iov_len <
+               ia->ri_max_inline_read;
+}
+
 /* Split @vec on page boundaries into SGEs. FMR registers pages, not
  * a byte range. Other modes coalesce these SGEs into a single MR
  * when they can.
@@ -733,7 +748,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
 
        rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
        xdr_init_encode(xdr, &req->rl_hdrbuf,
-                       req->rl_rdmabuf->rg_base);
+                       req->rl_rdmabuf->rg_base, rqst);
 
        /* Fixed header fields */
        ret = -EMSGSIZE;
@@ -762,7 +777,8 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
         */
        if (rpcrdma_results_inline(r_xprt, rqst))
                wtype = rpcrdma_noch;
-       else if (ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ)
+       else if ((ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) &&
+                rpcrdma_nonpayload_inline(r_xprt, rqst))
                wtype = rpcrdma_writech;
        else
                wtype = rpcrdma_replych;
@@ -1313,7 +1329,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 
        /* Fixed transport header fields */
        xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
-                       rep->rr_hdrbuf.head[0].iov_base);
+                       rep->rr_hdrbuf.head[0].iov_base, NULL);
        p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
        if (unlikely(!p))
                goto out_shortreply;
index b908f2ca08fd4d99f2a073655b984567fb598d4e..907464c2a9f038642f551a5874c90cd1759df18b 100644 (file)
@@ -304,7 +304,6 @@ xprt_setup_rdma_bc(struct xprt_create *args)
        xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
 
        xprt->prot = XPRT_TRANSPORT_BC_RDMA;
-       xprt->tsh_size = 0;
        xprt->ops = &xprt_rdma_bc_procs;
 
        memcpy(&xprt->addr, args->dstaddr, args->addrlen);
index fbc171ebfe9172dd8eb9a29f35ce6ef3e9bdeea3..5d261353bd90228c58d7d40a9ccf6f1824acb45f 100644 (file)
@@ -332,7 +332,6 @@ xprt_setup_rdma(struct xprt_create *args)
        xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
 
        xprt->resvport = 0;             /* privileged port not needed */
-       xprt->tsh_size = 0;             /* RPC-RDMA handles framing */
        xprt->ops = &xprt_rdma_procs;
 
        /*
@@ -738,7 +737,6 @@ xprt_rdma_send_request(struct rpc_rqst *rqst)
                goto drop_connection;
 
        rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
-       rqst->rq_bytes_sent = 0;
 
        /* An RPC with no reply will throw off credit accounting,
         * so drop the connection to reset the credit grant.
index 21113bfd4ecaf6a3cb79329af81fb40168830d35..89a63391d4d442f6d390556aa8cf0b5a2a41357a 100644 (file)
@@ -1481,6 +1481,8 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
        if (ep->rep_receive_count > needed)
                goto out;
        needed -= ep->rep_receive_count;
+       if (!temp)
+               needed += RPCRDMA_MAX_RECV_BATCH;
 
        count = 0;
        wr = NULL;
index 5a18472f2c9ce5d2f16de6209b01559c2667ebf8..10f6593e1a6abc38a739491f462bde1f792cd4e7 100644 (file)
@@ -205,6 +205,16 @@ struct rpcrdma_rep {
        struct ib_recv_wr       rr_recv_wr;
 };
 
+/* To reduce the rate at which a transport invokes ib_post_recv
+ * (and thus the hardware doorbell rate), xprtrdma posts Receive
+ * WRs in batches.
+ *
+ * Setting this to zero disables Receive post batching.
+ */
+enum {
+       RPCRDMA_MAX_RECV_BATCH = 7,
+};
+
 /* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes
  */
 struct rpcrdma_req;
@@ -577,7 +587,7 @@ void frwr_release_mr(struct rpcrdma_mr *mr);
 size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt);
 struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
                                struct rpcrdma_mr_seg *seg,
-                               int nsegs, bool writing, u32 xid,
+                               int nsegs, bool writing, __be32 xid,
                                struct rpcrdma_mr **mr);
 int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req);
 void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs);
index 7754aa3e434f405711cfce8d9bdfece972dfc75c..42f45d33dc5675ce5980c994a891f310bcd7c8b8 100644 (file)
@@ -50,6 +50,7 @@
 #include <linux/bvec.h>
 #include <linux/highmem.h>
 #include <linux/uio.h>
+#include <linux/sched/mm.h>
 
 #include <trace/events/sunrpc.h>
 
@@ -404,8 +405,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
        size_t want, seek_init = seek, offset = 0;
        ssize_t ret;
 
-       if (seek < buf->head[0].iov_len) {
-               want = min_t(size_t, count, buf->head[0].iov_len);
+       want = min_t(size_t, count, buf->head[0].iov_len);
+       if (seek < want) {
                ret = xs_read_kvec(sock, msg, flags, &buf->head[0], want, seek);
                if (ret <= 0)
                        goto sock_err;
@@ -416,13 +417,13 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
                        goto out;
                seek = 0;
        } else {
-               seek -= buf->head[0].iov_len;
-               offset += buf->head[0].iov_len;
+               seek -= want;
+               offset += want;
        }
 
        want = xs_alloc_sparse_pages(buf,
                        min_t(size_t, count - offset, buf->page_len),
-                       GFP_NOWAIT);
+                       GFP_KERNEL);
        if (seek < want) {
                ret = xs_read_bvec(sock, msg, flags, buf->bvec,
                                xdr_buf_pagecount(buf),
@@ -442,8 +443,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
                offset += want;
        }
 
-       if (seek < buf->tail[0].iov_len) {
-               want = min_t(size_t, count - offset, buf->tail[0].iov_len);
+       want = min_t(size_t, count - offset, buf->tail[0].iov_len);
+       if (seek < want) {
                ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek);
                if (ret <= 0)
                        goto sock_err;
@@ -453,7 +454,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
                if (ret != want)
                        goto out;
        } else
-               offset += buf->tail[0].iov_len;
+               offset = seek_init;
        ret = -EMSGSIZE;
 out:
        *read = offset - seek_init;
@@ -481,6 +482,14 @@ xs_read_stream_request_done(struct sock_xprt *transport)
        return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT);
 }
 
+static void
+xs_read_stream_check_eor(struct sock_xprt *transport,
+               struct msghdr *msg)
+{
+       if (xs_read_stream_request_done(transport))
+               msg->msg_flags |= MSG_EOR;
+}
+
 static ssize_t
 xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
                int flags, struct rpc_rqst *req)
@@ -492,17 +501,21 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
        xs_read_header(transport, buf);
 
        want = transport->recv.len - transport->recv.offset;
-       ret = xs_read_xdr_buf(transport->sock, msg, flags, buf,
-                       transport->recv.copied + want, transport->recv.copied,
-                       &read);
-       transport->recv.offset += read;
-       transport->recv.copied += read;
-       if (transport->recv.offset == transport->recv.len) {
-               if (xs_read_stream_request_done(transport))
-                       msg->msg_flags |= MSG_EOR;
-               return read;
+       if (want != 0) {
+               ret = xs_read_xdr_buf(transport->sock, msg, flags, buf,
+                               transport->recv.copied + want,
+                               transport->recv.copied,
+                               &read);
+               transport->recv.offset += read;
+               transport->recv.copied += read;
        }
 
+       if (transport->recv.offset == transport->recv.len)
+               xs_read_stream_check_eor(transport, msg);
+
+       if (want == 0)
+               return 0;
+
        switch (ret) {
        default:
                break;
@@ -655,13 +668,35 @@ out_err:
        return ret != 0 ? ret : -ESHUTDOWN;
 }
 
+static __poll_t xs_poll_socket(struct sock_xprt *transport)
+{
+       return transport->sock->ops->poll(transport->file, transport->sock,
+                       NULL);
+}
+
+static bool xs_poll_socket_readable(struct sock_xprt *transport)
+{
+       __poll_t events = xs_poll_socket(transport);
+
+       return (events & (EPOLLIN | EPOLLRDNORM)) && !(events & EPOLLRDHUP);
+}
+
+static void xs_poll_check_readable(struct sock_xprt *transport)
+{
+
+       clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+       if (!xs_poll_socket_readable(transport))
+               return;
+       if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
+               queue_work(xprtiod_workqueue, &transport->recv_worker);
+}
+
 static void xs_stream_data_receive(struct sock_xprt *transport)
 {
        size_t read = 0;
        ssize_t ret = 0;
 
        mutex_lock(&transport->recv_mutex);
-       clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
        if (transport->sock == NULL)
                goto out;
        for (;;) {
@@ -671,6 +706,10 @@ static void xs_stream_data_receive(struct sock_xprt *transport)
                read += ret;
                cond_resched();
        }
+       if (ret == -ESHUTDOWN)
+               kernel_sock_shutdown(transport->sock, SHUT_RDWR);
+       else
+               xs_poll_check_readable(transport);
 out:
        mutex_unlock(&transport->recv_mutex);
        trace_xs_stream_read_data(&transport->xprt, ret, read);
@@ -680,7 +719,10 @@ static void xs_stream_data_receive_workfn(struct work_struct *work)
 {
        struct sock_xprt *transport =
                container_of(work, struct sock_xprt, recv_worker);
+       unsigned int pflags = memalloc_nofs_save();
+
        xs_stream_data_receive(transport);
+       memalloc_nofs_restore(pflags);
 }
 
 static void
@@ -690,65 +732,65 @@ xs_stream_reset_connect(struct sock_xprt *transport)
        transport->recv.len = 0;
        transport->recv.copied = 0;
        transport->xmit.offset = 0;
+}
+
+static void
+xs_stream_start_connect(struct sock_xprt *transport)
+{
        transport->xprt.stat.connect_count++;
        transport->xprt.stat.connect_start = jiffies;
 }
 
 #define XS_SENDMSG_FLAGS       (MSG_DONTWAIT | MSG_NOSIGNAL)
 
-static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more)
+static int xs_sendmsg(struct socket *sock, struct msghdr *msg, size_t seek)
 {
-       struct msghdr msg = {
-               .msg_name       = addr,
-               .msg_namelen    = addrlen,
-               .msg_flags      = XS_SENDMSG_FLAGS | (more ? MSG_MORE : 0),
-       };
-       struct kvec iov = {
-               .iov_base       = vec->iov_base + base,
-               .iov_len        = vec->iov_len - base,
-       };
+       if (seek)
+               iov_iter_advance(&msg->msg_iter, seek);
+       return sock_sendmsg(sock, msg);
+}
 
-       if (iov.iov_len != 0)
-               return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
-       return kernel_sendmsg(sock, &msg, NULL, 0, 0);
+static int xs_send_kvec(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t seek)
+{
+       iov_iter_kvec(&msg->msg_iter, WRITE, vec, 1, vec->iov_len);
+       return xs_sendmsg(sock, msg, seek);
 }
 
-static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy, int *sent_p)
+static int xs_send_pagedata(struct socket *sock, struct msghdr *msg, struct xdr_buf *xdr, size_t base)
 {
-       ssize_t (*do_sendpage)(struct socket *sock, struct page *page,
-                       int offset, size_t size, int flags);
-       struct page **ppage;
-       unsigned int remainder;
        int err;
 
-       remainder = xdr->page_len - base;
-       base += xdr->page_base;
-       ppage = xdr->pages + (base >> PAGE_SHIFT);
-       base &= ~PAGE_MASK;
-       do_sendpage = sock->ops->sendpage;
-       if (!zerocopy)
-               do_sendpage = sock_no_sendpage;
-       for(;;) {
-               unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder);
-               int flags = XS_SENDMSG_FLAGS;
+       err = xdr_alloc_bvec(xdr, GFP_KERNEL);
+       if (err < 0)
+               return err;
 
-               remainder -= len;
-               if (more)
-                       flags |= MSG_MORE;
-               if (remainder != 0)
-                       flags |= MSG_SENDPAGE_NOTLAST | MSG_MORE;
-               err = do_sendpage(sock, *ppage, base, len, flags);
-               if (remainder == 0 || err != len)
-                       break;
-               *sent_p += err;
-               ppage++;
-               base = 0;
-       }
-       if (err > 0) {
-               *sent_p += err;
-               err = 0;
-       }
-       return err;
+       iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec,
+                       xdr_buf_pagecount(xdr),
+                       xdr->page_len + xdr->page_base);
+       return xs_sendmsg(sock, msg, base + xdr->page_base);
+}
+
+#define xs_record_marker_len() sizeof(rpc_fraghdr)
+
+/* Common case:
+ *  - stream transport
+ *  - sending from byte 0 of the message
+ *  - the message is wholly contained in @xdr's head iovec
+ */
+static int xs_send_rm_and_kvec(struct socket *sock, struct msghdr *msg,
+               rpc_fraghdr marker, struct kvec *vec, size_t base)
+{
+       struct kvec iov[2] = {
+               [0] = {
+                       .iov_base       = &marker,
+                       .iov_len        = sizeof(marker)
+               },
+               [1] = *vec,
+       };
+       size_t len = iov[0].iov_len + iov[1].iov_len;
+
+       iov_iter_kvec(&msg->msg_iter, WRITE, iov, 2, len);
+       return xs_sendmsg(sock, msg, base);
 }
 
 /**
@@ -758,49 +800,60 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
  * @addrlen: UDP only -- length of destination address
  * @xdr: buffer containing this request
  * @base: starting position in the buffer
- * @zerocopy: true if it is safe to use sendpage()
+ * @rm: stream record marker field
  * @sent_p: return the total number of bytes successfully queued for sending
  *
  */
-static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy, int *sent_p)
+static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, rpc_fraghdr rm, int *sent_p)
 {
-       unsigned int remainder = xdr->len - base;
+       struct msghdr msg = {
+               .msg_name = addr,
+               .msg_namelen = addrlen,
+               .msg_flags = XS_SENDMSG_FLAGS | MSG_MORE,
+       };
+       unsigned int rmsize = rm ? sizeof(rm) : 0;
+       unsigned int remainder = rmsize + xdr->len - base;
+       unsigned int want;
        int err = 0;
-       int sent = 0;
 
        if (unlikely(!sock))
                return -ENOTSOCK;
 
-       if (base != 0) {
-               addr = NULL;
-               addrlen = 0;
-       }
-
-       if (base < xdr->head[0].iov_len || addr != NULL) {
-               unsigned int len = xdr->head[0].iov_len - base;
+       want = xdr->head[0].iov_len + rmsize;
+       if (base < want) {
+               unsigned int len = want - base;
                remainder -= len;
-               err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0);
+               if (remainder == 0)
+                       msg.msg_flags &= ~MSG_MORE;
+               if (rmsize)
+                       err = xs_send_rm_and_kvec(sock, &msg, rm,
+                                       &xdr->head[0], base);
+               else
+                       err = xs_send_kvec(sock, &msg, &xdr->head[0], base);
                if (remainder == 0 || err != len)
                        goto out;
                *sent_p += err;
                base = 0;
        } else
-               base -= xdr->head[0].iov_len;
+               base -= want;
 
        if (base < xdr->page_len) {
                unsigned int len = xdr->page_len - base;
                remainder -= len;
-               err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy, &sent);
-               *sent_p += sent;
-               if (remainder == 0 || sent != len)
+               if (remainder == 0)
+                       msg.msg_flags &= ~MSG_MORE;
+               err = xs_send_pagedata(sock, &msg, xdr, base);
+               if (remainder == 0 || err != len)
                        goto out;
+               *sent_p += err;
                base = 0;
        } else
                base -= xdr->page_len;
 
        if (base >= xdr->tail[0].iov_len)
                return 0;
-       err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0);
+       msg.msg_flags &= ~MSG_MORE;
+       err = xs_send_kvec(sock, &msg, &xdr->tail[0], base);
 out:
        if (err > 0) {
                *sent_p += err;
@@ -856,7 +909,7 @@ static int xs_nospace(struct rpc_rqst *req)
 static void
 xs_stream_prepare_request(struct rpc_rqst *req)
 {
-       req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_NOIO);
+       req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL);
 }
 
 /*
@@ -870,13 +923,14 @@ xs_send_request_was_aborted(struct sock_xprt *transport, struct rpc_rqst *req)
 }
 
 /*
- * Construct a stream transport record marker in @buf.
+ * Return the stream record marker field for a record of length < 2^31-1
  */
-static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
+static rpc_fraghdr
+xs_stream_record_marker(struct xdr_buf *xdr)
 {
-       u32 reclen = buf->len - sizeof(rpc_fraghdr);
-       rpc_fraghdr *base = buf->head[0].iov_base;
-       *base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen);
+       if (!xdr->len)
+               return 0;
+       return cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | (u32)xdr->len);
 }
 
 /**
@@ -905,15 +959,14 @@ static int xs_local_send_request(struct rpc_rqst *req)
                return -ENOTCONN;
        }
 
-       xs_encode_stream_record_marker(&req->rq_snd_buf);
-
        xs_pktdump("packet data:",
                        req->rq_svec->iov_base, req->rq_svec->iov_len);
 
        req->rq_xtime = ktime_get();
        status = xs_sendpages(transport->sock, NULL, 0, xdr,
                              transport->xmit.offset,
-                             true, &sent);
+                             xs_stream_record_marker(xdr),
+                             &sent);
        dprintk("RPC:       %s(%u) = %d\n",
                        __func__, xdr->len - transport->xmit.offset, status);
 
@@ -925,7 +978,6 @@ static int xs_local_send_request(struct rpc_rqst *req)
                req->rq_bytes_sent = transport->xmit.offset;
                if (likely(req->rq_bytes_sent >= req->rq_slen)) {
                        req->rq_xmit_bytes_sent += transport->xmit.offset;
-                       req->rq_bytes_sent = 0;
                        transport->xmit.offset = 0;
                        return 0;
                }
@@ -981,7 +1033,7 @@ static int xs_udp_send_request(struct rpc_rqst *req)
 
        req->rq_xtime = ktime_get();
        status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
-                             xdr, 0, true, &sent);
+                             xdr, 0, 0, &sent);
 
        dprintk("RPC:       xs_udp_send_request(%u) = %d\n",
                        xdr->len, status);
@@ -1045,7 +1097,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
        struct rpc_xprt *xprt = req->rq_xprt;
        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
        struct xdr_buf *xdr = &req->rq_snd_buf;
-       bool zerocopy = true;
        bool vm_wait = false;
        int status;
        int sent;
@@ -1057,17 +1108,9 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
                return -ENOTCONN;
        }
 
-       xs_encode_stream_record_marker(&req->rq_snd_buf);
-
        xs_pktdump("packet data:",
                                req->rq_svec->iov_base,
                                req->rq_svec->iov_len);
-       /* Don't use zero copy if this is a resend. If the RPC call
-        * completes while the socket holds a reference to the pages,
-        * then we may end up resending corrupted data.
-        */
-       if (req->rq_task->tk_flags & RPC_TASK_SENT)
-               zerocopy = false;
 
        if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state))
                xs_tcp_set_socket_timeouts(xprt, transport->sock);
@@ -1080,7 +1123,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
                sent = 0;
                status = xs_sendpages(transport->sock, NULL, 0, xdr,
                                      transport->xmit.offset,
-                                     zerocopy, &sent);
+                                     xs_stream_record_marker(xdr),
+                                     &sent);
 
                dprintk("RPC:       xs_tcp_send_request(%u) = %d\n",
                                xdr->len - transport->xmit.offset, status);
@@ -1091,7 +1135,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
                req->rq_bytes_sent = transport->xmit.offset;
                if (likely(req->rq_bytes_sent >= req->rq_slen)) {
                        req->rq_xmit_bytes_sent += transport->xmit.offset;
-                       req->rq_bytes_sent = 0;
                        transport->xmit.offset = 0;
                        return 0;
                }
@@ -1211,6 +1254,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
        struct socket *sock = transport->sock;
        struct sock *sk = transport->inet;
        struct rpc_xprt *xprt = &transport->xprt;
+       struct file *filp = transport->file;
 
        if (sk == NULL)
                return;
@@ -1224,6 +1268,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
        write_lock_bh(&sk->sk_callback_lock);
        transport->inet = NULL;
        transport->sock = NULL;
+       transport->file = NULL;
 
        sk->sk_user_data = NULL;
 
@@ -1231,10 +1276,12 @@ static void xs_reset_transport(struct sock_xprt *transport)
        xprt_clear_connected(xprt);
        write_unlock_bh(&sk->sk_callback_lock);
        xs_sock_reset_connection_flags(xprt);
+       /* Reset stream record info */
+       xs_stream_reset_connect(transport);
        mutex_unlock(&transport->recv_mutex);
 
        trace_rpc_socket_close(xprt, sock);
-       sock_release(sock);
+       fput(filp);
 
        xprt_disconnect_done(xprt);
 }
@@ -1358,7 +1405,6 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
        int err;
 
        mutex_lock(&transport->recv_mutex);
-       clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
        sk = transport->inet;
        if (sk == NULL)
                goto out;
@@ -1370,6 +1416,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
                consume_skb(skb);
                cond_resched();
        }
+       xs_poll_check_readable(transport);
 out:
        mutex_unlock(&transport->recv_mutex);
 }
@@ -1378,7 +1425,10 @@ static void xs_udp_data_receive_workfn(struct work_struct *work)
 {
        struct sock_xprt *transport =
                container_of(work, struct sock_xprt, recv_worker);
+       unsigned int pflags = memalloc_nofs_save();
+
        xs_udp_data_receive(transport);
+       memalloc_nofs_restore(pflags);
 }
 
 /**
@@ -1826,6 +1876,7 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
                struct sock_xprt *transport, int family, int type,
                int protocol, bool reuseport)
 {
+       struct file *filp;
        struct socket *sock;
        int err;
 
@@ -1846,6 +1897,11 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
                goto out;
        }
 
+       filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+       if (IS_ERR(filp))
+               return ERR_CAST(filp);
+       transport->file = filp;
+
        return sock;
 out:
        return ERR_PTR(err);
@@ -1869,7 +1925,6 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
                sk->sk_write_space = xs_udp_write_space;
                sock_set_flag(sk, SOCK_FASYNC);
                sk->sk_error_report = xs_error_report;
-               sk->sk_allocation = GFP_NOIO;
 
                xprt_clear_connected(xprt);
 
@@ -1880,7 +1935,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
                write_unlock_bh(&sk->sk_callback_lock);
        }
 
-       xs_stream_reset_connect(transport);
+       xs_stream_start_connect(transport);
 
        return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0);
 }
@@ -1892,6 +1947,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
 static int xs_local_setup_socket(struct sock_xprt *transport)
 {
        struct rpc_xprt *xprt = &transport->xprt;
+       struct file *filp;
        struct socket *sock;
        int status = -EIO;
 
@@ -1904,6 +1960,13 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
        }
        xs_reclassify_socket(AF_LOCAL, sock);
 
+       filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+       if (IS_ERR(filp)) {
+               status = PTR_ERR(filp);
+               goto out;
+       }
+       transport->file = filp;
+
        dprintk("RPC:       worker connecting xprt %p via AF_LOCAL to %s\n",
                        xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
 
@@ -2057,7 +2120,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                sk->sk_data_ready = xs_data_ready;
                sk->sk_write_space = xs_udp_write_space;
                sock_set_flag(sk, SOCK_FASYNC);
-               sk->sk_allocation = GFP_NOIO;
 
                xprt_set_connected(xprt);
 
@@ -2220,7 +2282,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                sk->sk_write_space = xs_tcp_write_space;
                sock_set_flag(sk, SOCK_FASYNC);
                sk->sk_error_report = xs_error_report;
-               sk->sk_allocation = GFP_NOIO;
 
                /* socket options */
                sock_reset_flag(sk, SOCK_LINGER);
@@ -2240,8 +2301,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 
        xs_set_memalloc(xprt);
 
-       /* Reset TCP record info */
-       xs_stream_reset_connect(transport);
+       xs_stream_start_connect(transport);
 
        /* Tell the socket layer to start connecting... */
        set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
@@ -2534,26 +2594,35 @@ static int bc_sendto(struct rpc_rqst *req)
 {
        int len;
        struct xdr_buf *xbufp = &req->rq_snd_buf;
-       struct rpc_xprt *xprt = req->rq_xprt;
        struct sock_xprt *transport =
-                               container_of(xprt, struct sock_xprt, xprt);
-       struct socket *sock = transport->sock;
+                       container_of(req->rq_xprt, struct sock_xprt, xprt);
        unsigned long headoff;
        unsigned long tailoff;
+       struct page *tailpage;
+       struct msghdr msg = {
+               .msg_flags      = MSG_MORE
+       };
+       rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
+                                        (u32)xbufp->len);
+       struct kvec iov = {
+               .iov_base       = &marker,
+               .iov_len        = sizeof(marker),
+       };
 
-       xs_encode_stream_record_marker(xbufp);
+       len = kernel_sendmsg(transport->sock, &msg, &iov, 1, iov.iov_len);
+       if (len != iov.iov_len)
+               return -EAGAIN;
 
+       tailpage = NULL;
+       if (xbufp->tail[0].iov_len)
+               tailpage = virt_to_page(xbufp->tail[0].iov_base);
        tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
        headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
-       len = svc_send_common(sock, xbufp,
+       len = svc_send_common(transport->sock, xbufp,
                              virt_to_page(xbufp->head[0].iov_base), headoff,
-                             xbufp->tail[0].iov_base, tailoff);
-
-       if (len != xbufp->len) {
-               printk(KERN_NOTICE "Error sending entire callback!\n");
-               len = -EAGAIN;
-       }
-
+                             tailpage, tailoff);
+       if (len != xbufp->len)
+               return -EAGAIN;
        return len;
 }
 
@@ -2793,7 +2862,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
        transport = container_of(xprt, struct sock_xprt, xprt);
 
        xprt->prot = 0;
-       xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
        xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
        xprt->bind_timeout = XS_BIND_TO;
@@ -2862,7 +2930,6 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
        transport = container_of(xprt, struct sock_xprt, xprt);
 
        xprt->prot = IPPROTO_UDP;
-       xprt->tsh_size = 0;
        /* XXX: header size can vary due to auth type, IPv6, etc. */
        xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
 
@@ -2942,7 +3009,6 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
        transport = container_of(xprt, struct sock_xprt, xprt);
 
        xprt->prot = IPPROTO_TCP;
-       xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
        xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
        xprt->bind_timeout = XS_BIND_TO;
@@ -3015,7 +3081,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
        transport = container_of(xprt, struct sock_xprt, xprt);
 
        xprt->prot = IPPROTO_TCP;
-       xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
        xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
        xprt->timeout = &xs_tcp_default_timeout;