NFSv4/flexfiles: Abort I/O early if the layout segment was invalidated
authorTrond Myklebust <trond.myklebust@hammerspace.com>
Wed, 27 Feb 2019 20:37:36 +0000 (15:37 -0500)
committerTrond Myklebust <trond.myklebust@hammerspace.com>
Fri, 1 Mar 2019 21:20:16 +0000 (16:20 -0500)
If a layout segment gets invalidated while a pNFS I/O operation
is queued for transmission, then we ideally want to abort
immediately. This is particularly the case when there is a large
number of I/O related RPCs queued in the RPC layer, and the layout
segment gets invalidated due to an ENOSPC error, or an EACCES (because
the client was fenced). We may end up forced to spam the MDS with a
lot of otherwise unnecessary LAYOUTERRORs after that I/O fails.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
fs/nfs/flexfilelayout/flexfilelayout.c
include/linux/sunrpc/sched.h
net/sunrpc/xprt.c

index 244a03c22b312ff0621ded3f0bd5b6c2116f403a..a8e9bdd978e74f5ea16a0618eb561f4cba95c67f 100644 (file)
@@ -1071,6 +1071,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
                break;
        case -NFS4ERR_RETRY_UNCACHED_REP:
                break;
+       case -EAGAIN:
+               return -NFS4ERR_RESET_TO_PNFS;
        /* Invalidate Layout errors */
        case -NFS4ERR_PNFS_NO_LAYOUT:
        case -ESTALE:           /* mapped NFS4ERR_STALE */
@@ -1131,6 +1133,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
        case -EBADHANDLE:
        case -ELOOP:
        case -ENOSPC:
+       case -EAGAIN:
                break;
        case -EJUKEBOX:
                nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
@@ -1369,6 +1372,16 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data)
        ff_layout_read_prepare_common(task, hdr);
 }
 
+static void
+ff_layout_io_prepare_transmit(struct rpc_task *task,
+               void *data)
+{
+       struct nfs_pgio_header *hdr = data;
+
+       if (!pnfs_is_valid_lseg(hdr->lseg))
+               rpc_exit(task, -EAGAIN);
+}
+
 static void ff_layout_read_call_done(struct rpc_task *task, void *data)
 {
        struct nfs_pgio_header *hdr = data;
@@ -1657,6 +1670,7 @@ static void ff_layout_commit_release(void *data)
 
 static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
        .rpc_call_prepare = ff_layout_read_prepare_v3,
+       .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
        .rpc_call_done = ff_layout_read_call_done,
        .rpc_count_stats = ff_layout_read_count_stats,
        .rpc_release = ff_layout_read_release,
@@ -1664,6 +1678,7 @@ static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
 
 static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
        .rpc_call_prepare = ff_layout_read_prepare_v4,
+       .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
        .rpc_call_done = ff_layout_read_call_done,
        .rpc_count_stats = ff_layout_read_count_stats,
        .rpc_release = ff_layout_read_release,
@@ -1671,6 +1686,7 @@ static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
 
 static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
        .rpc_call_prepare = ff_layout_write_prepare_v3,
+       .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
        .rpc_call_done = ff_layout_write_call_done,
        .rpc_count_stats = ff_layout_write_count_stats,
        .rpc_release = ff_layout_write_release,
@@ -1678,6 +1694,7 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
 
 static const struct rpc_call_ops ff_layout_write_call_ops_v4 = {
        .rpc_call_prepare = ff_layout_write_prepare_v4,
+       .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
        .rpc_call_done = ff_layout_write_call_done,
        .rpc_count_stats = ff_layout_write_count_stats,
        .rpc_release = ff_layout_write_release,
index 219aa3910a0c10fa6013c7df83013b5802d186ee..52d41d0c1ae1d54b6829a10134318ee2835b9fd5 100644 (file)
@@ -97,6 +97,7 @@ typedef void                  (*rpc_action)(struct rpc_task *);
 
 struct rpc_call_ops {
        void (*rpc_call_prepare)(struct rpc_task *, void *);
+       void (*rpc_call_prepare_transmit)(struct rpc_task *, void *);
        void (*rpc_call_done)(struct rpc_task *, void *);
        void (*rpc_count_stats)(struct rpc_task *, void *);
        void (*rpc_release)(void *);
index 1cf4e379be7b3c465d416a97ecc2e100dd1d3d14..e096c5a725dff123a347741bffcfdef13227218f 100644 (file)
@@ -1330,6 +1330,13 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
                        status = -EBADMSG;
                        goto out_dequeue;
                }
+               if (task->tk_ops->rpc_call_prepare_transmit) {
+                       task->tk_ops->rpc_call_prepare_transmit(task,
+                                       task->tk_calldata);
+                       status = task->tk_status;
+                       if (status < 0)
+                               goto out_dequeue;
+               }
        }
 
        /*