NFSv4: Add support for CB_RECALL_ANY for flexfiles layouts
authorTrond Myklebust <trond.myklebust@hammerspace.com>
Tue, 18 Feb 2020 20:58:31 +0000 (15:58 -0500)
committerTrond Myklebust <trond.myklebust@hammerspace.com>
Mon, 16 Mar 2020 12:34:30 +0000 (08:34 -0400)
When we receive a CB_RECALL_ANY that asks us to return flexfiles
layouts, we iterate through all the layouts and look at whether or
not there are active open file descriptors that might need them
for I/O. If there are no such descriptors, we return the layouts.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
fs/nfs/callback.h
fs/nfs/callback_proc.c
fs/nfs/nfs4_fs.h
fs/nfs/nfs4state.c
fs/nfs/nfs4trace.h
fs/nfs/pnfs.c
fs/nfs/pnfs.h

index 549350259840876d340deb48b7604d09323a40b4..6a2033131c068e82e9934c6baba20c243dfad62f 100644 (file)
@@ -127,7 +127,9 @@ extern __be32 nfs4_callback_sequence(void *argp, void *resp,
 #define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX  9
 #define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12
 #define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15
-#define RCA4_TYPE_MASK_ALL 0xf31f
+#define PNFS_FF_RCA4_TYPE_MASK_READ 16
+#define PNFS_FF_RCA4_TYPE_MASK_RW 17
+#define RCA4_TYPE_MASK_ALL 0x3f31f
 
 struct cb_recallanyargs {
        uint32_t        craa_objs_to_keep;
index 97084804a95371b4ff04db45f994fff3f96a0325..e61dbc9b86ae25a759f1c617c2ed414bb5ad9975 100644 (file)
@@ -597,6 +597,7 @@ __be32 nfs4_callback_recallany(void *argp, void *resp,
        struct cb_recallanyargs *args = argp;
        __be32 status;
        fmode_t flags = 0;
+       bool schedule_manager = false;
 
        status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
        if (!cps->clp) /* set in cb_sequence */
@@ -619,6 +620,18 @@ __be32 nfs4_callback_recallany(void *argp, void *resp,
 
        if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_FILE_LAYOUT))
                pnfs_recall_all_layouts(cps->clp);
+
+       if (args->craa_type_mask & BIT(PNFS_FF_RCA4_TYPE_MASK_READ)) {
+               set_bit(NFS4CLNT_RECALL_ANY_LAYOUT_READ, &cps->clp->cl_state);
+               schedule_manager = true;
+       }
+       if (args->craa_type_mask & BIT(PNFS_FF_RCA4_TYPE_MASK_RW)) {
+               set_bit(NFS4CLNT_RECALL_ANY_LAYOUT_RW, &cps->clp->cl_state);
+               schedule_manager = true;
+       }
+       if (schedule_manager)
+               nfs4_schedule_state_manager(cps->clp);
+
 out:
        dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
        return status;
index 8be1ba7c62bb8f4074dff6ab98609b357d793f3d..2b7f6dcd2eb82e0adc9342b03a92dc396d4260b2 100644 (file)
@@ -42,7 +42,9 @@ enum nfs4_client_state {
        NFS4CLNT_LEASE_MOVED,
        NFS4CLNT_DELEGATION_EXPIRED,
        NFS4CLNT_RUN_MANAGER,
-       NFS4CLNT_DELEGRETURN_RUNNING,
+       NFS4CLNT_RECALL_RUNNING,
+       NFS4CLNT_RECALL_ANY_LAYOUT_READ,
+       NFS4CLNT_RECALL_ANY_LAYOUT_RW,
 };
 
 #define NFS4_RENEW_TIMEOUT             0x01
index f7723d221945b7aa7d9f629014a49e6d61e632e2..ac93715c05a49ba4fdd81e5b7b2c3a7a249212d7 100644 (file)
@@ -2524,6 +2524,21 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp)
        }
        return 0;
 }
+
+static void nfs4_layoutreturn_any_run(struct nfs_client *clp)
+{
+       int iomode = 0;
+
+       if (test_and_clear_bit(NFS4CLNT_RECALL_ANY_LAYOUT_READ, &clp->cl_state))
+               iomode += IOMODE_READ;
+       if (test_and_clear_bit(NFS4CLNT_RECALL_ANY_LAYOUT_RW, &clp->cl_state))
+               iomode += IOMODE_RW;
+       /* Note: IOMODE_READ + IOMODE_RW == IOMODE_ANY */
+       if (iomode) {
+               pnfs_layout_return_unused_byclid(clp, iomode);
+               set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
+       }
+}
 #else /* CONFIG_NFS_V4_1 */
 static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
 
@@ -2531,6 +2546,10 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp)
 {
        return 0;
 }
+
+static void nfs4_layoutreturn_any_run(struct nfs_client *clp)
+{
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 static void nfs4_state_manager(struct nfs_client *clp)
@@ -2635,12 +2654,13 @@ static void nfs4_state_manager(struct nfs_client *clp)
                nfs4_end_drain_session(clp);
                nfs4_clear_state_manager_bit(clp);
 
-               if (!test_and_set_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state)) {
+               if (!test_and_set_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state)) {
                        if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
                                nfs_client_return_marked_delegations(clp);
                                set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
                        }
-                       clear_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state);
+                       nfs4_layoutreturn_any_run(clp);
+                       clear_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state);
                }
 
                /* Did we race with an attempt to give us more work? */
index 1e97e5e04cb43ba671c491b6433174e8c1552e0a..543541173a3d226c2db45ac1de51ff08a2490839 100644 (file)
@@ -584,7 +584,9 @@ TRACE_DEFINE_ENUM(NFS4CLNT_MOVED);
 TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_MOVED);
 TRACE_DEFINE_ENUM(NFS4CLNT_DELEGATION_EXPIRED);
 TRACE_DEFINE_ENUM(NFS4CLNT_RUN_MANAGER);
-TRACE_DEFINE_ENUM(NFS4CLNT_DELEGRETURN_RUNNING);
+TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_RUNNING);
+TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_ANY_LAYOUT_READ);
+TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_ANY_LAYOUT_RW);
 
 #define show_nfs4_clp_state(state) \
        __print_flags(state, "|", \
@@ -605,7 +607,9 @@ TRACE_DEFINE_ENUM(NFS4CLNT_DELEGRETURN_RUNNING);
                { NFS4CLNT_LEASE_MOVED,         "LEASE_MOVED" }, \
                { NFS4CLNT_DELEGATION_EXPIRED,  "DELEGATION_EXPIRED" }, \
                { NFS4CLNT_RUN_MANAGER,         "RUN_MANAGER" }, \
-               { NFS4CLNT_DELEGRETURN_RUNNING, "DELEGRETURN_RUNNING" })
+               { NFS4CLNT_RECALL_RUNNING,      "RECALL_RUNNING" }, \
+               { NFS4CLNT_RECALL_ANY_LAYOUT_READ, "RECALL_ANY_LAYOUT_READ" }, \
+               { NFS4CLNT_RECALL_ANY_LAYOUT_RW, "RECALL_ANY_LAYOUT_RW" })
 
 TRACE_EVENT(nfs4_state_mgr,
                TP_PROTO(
index 268e7b9ff54e34d092f983671540c8d0609cf64b..6b25117fca5f22d5379ec8445c7dc641c40508eb 100644 (file)
@@ -309,6 +309,16 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
        }
 }
 
+static struct inode *
+pnfs_grab_inode_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+       struct inode *inode = igrab(lo->plh_inode);
+       if (inode)
+               return inode;
+       set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags);
+       return NULL;
+}
+
 static void
 pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,
                         u32 seq)
@@ -782,7 +792,7 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
                /* If the sb is being destroyed, just bail */
                if (!nfs_sb_active(server->super))
                        break;
-               inode = igrab(lo->plh_inode);
+               inode = pnfs_grab_inode_layout_hdr(lo);
                if (inode != NULL) {
                        if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags))
                                list_del_rcu(&lo->plh_layouts);
@@ -795,7 +805,6 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
                } else {
                        rcu_read_unlock();
                        spin_unlock(&clp->cl_lock);
-                       set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags);
                }
                nfs_sb_deactive(server->super);
                spin_lock(&clp->cl_lock);
@@ -2434,29 +2443,26 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
        return -ENOENT;
 }
 
-void pnfs_error_mark_layout_for_return(struct inode *inode,
-                                      struct pnfs_layout_segment *lseg)
+static void
+pnfs_mark_layout_for_return(struct inode *inode,
+                           const struct pnfs_layout_range *range)
 {
-       struct pnfs_layout_hdr *lo = NFS_I(inode)->layout;
-       struct pnfs_layout_range range = {
-               .iomode = lseg->pls_range.iomode,
-               .offset = 0,
-               .length = NFS4_MAX_UINT64,
-       };
+       struct pnfs_layout_hdr *lo;
        bool return_now = false;
 
        spin_lock(&inode->i_lock);
+       lo = NFS_I(inode)->layout;
        if (!pnfs_layout_is_valid(lo)) {
                spin_unlock(&inode->i_lock);
                return;
        }
-       pnfs_set_plh_return_info(lo, range.iomode, 0);
+       pnfs_set_plh_return_info(lo, range->iomode, 0);
        /*
         * mark all matching lsegs so that we are sure to have no live
         * segments at hand when sending layoutreturn. See pnfs_put_lseg()
         * for how it works.
         */
-       if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, &range, 0) != -EBUSY) {
+       if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, range, 0) != -EBUSY) {
                nfs4_stateid stateid;
                enum pnfs_iomode iomode;
 
@@ -2469,8 +2475,126 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
                nfs_commit_inode(inode, 0);
        }
 }
+
+void pnfs_error_mark_layout_for_return(struct inode *inode,
+                                      struct pnfs_layout_segment *lseg)
+{
+       struct pnfs_layout_range range = {
+               .iomode = lseg->pls_range.iomode,
+               .offset = 0,
+               .length = NFS4_MAX_UINT64,
+       };
+
+       pnfs_mark_layout_for_return(inode, &range);
+}
 EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);
 
+static bool
+pnfs_layout_can_be_returned(struct pnfs_layout_hdr *lo)
+{
+       return pnfs_layout_is_valid(lo) &&
+               !test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) &&
+               !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
+}
+
+static struct pnfs_layout_segment *
+pnfs_find_first_lseg(struct pnfs_layout_hdr *lo,
+                    const struct pnfs_layout_range *range,
+                    enum pnfs_iomode iomode)
+{
+       struct pnfs_layout_segment *lseg;
+
+       list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
+               if (!test_bit(NFS_LSEG_VALID, &lseg->pls_flags))
+                       continue;
+               if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
+                       continue;
+               if (lseg->pls_range.iomode != iomode && iomode != IOMODE_ANY)
+                       continue;
+               if (pnfs_lseg_range_intersecting(&lseg->pls_range, range))
+                       return lseg;
+       }
+       return NULL;
+}
+
+/* Find open file states whose mode matches that of the range */
+static bool
+pnfs_should_return_unused_layout(struct pnfs_layout_hdr *lo,
+                                const struct pnfs_layout_range *range)
+{
+       struct list_head *head;
+       struct nfs_open_context *ctx;
+       fmode_t mode = 0;
+
+       if (!pnfs_layout_can_be_returned(lo) ||
+           !pnfs_find_first_lseg(lo, range, range->iomode))
+               return false;
+
+       head = &NFS_I(lo->plh_inode)->open_files;
+       list_for_each_entry_rcu(ctx, head, list) {
+               if (ctx->state)
+                       mode |= ctx->state->state & (FMODE_READ|FMODE_WRITE);
+       }
+
+       switch (range->iomode) {
+       default:
+               break;
+       case IOMODE_READ:
+               mode &= ~FMODE_WRITE;
+               break;
+       case IOMODE_RW:
+               if (pnfs_find_first_lseg(lo, range, IOMODE_READ))
+                       mode &= ~FMODE_READ;
+       }
+       return mode == 0;
+}
+
+static int
+pnfs_layout_return_unused_byserver(struct nfs_server *server, void *data)
+{
+       const struct pnfs_layout_range *range = data;
+       struct pnfs_layout_hdr *lo;
+       struct inode *inode;
+restart:
+       rcu_read_lock();
+       list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
+               if (!pnfs_layout_can_be_returned(lo) ||
+                   test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
+                       continue;
+               inode = lo->plh_inode;
+               spin_lock(&inode->i_lock);
+               if (!pnfs_should_return_unused_layout(lo, range)) {
+                       spin_unlock(&inode->i_lock);
+                       continue;
+               }
+               spin_unlock(&inode->i_lock);
+               inode = pnfs_grab_inode_layout_hdr(lo);
+               if (!inode)
+                       continue;
+               rcu_read_unlock();
+               pnfs_mark_layout_for_return(inode, range);
+               iput(inode);
+               cond_resched();
+               goto restart;
+       }
+       rcu_read_unlock();
+       return 0;
+}
+
+void
+pnfs_layout_return_unused_byclid(struct nfs_client *clp,
+                                enum pnfs_iomode iomode)
+{
+       struct pnfs_layout_range range = {
+               .iomode = iomode,
+               .offset = 0,
+               .length = NFS4_MAX_UINT64,
+       };
+
+       nfs_client_for_each_server(clp, pnfs_layout_return_unused_byserver,
+                       &range);
+}
+
 void
 pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio)
 {
index 8df9aa02d336f0b4ed5b888ea3f657f667ad605d..7bfb6970134a1d75024141654b1fca85e83e73eb 100644 (file)
@@ -329,6 +329,9 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *);
 struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
 void pnfs_error_mark_layout_for_return(struct inode *inode,
                                       struct pnfs_layout_segment *lseg);
+void pnfs_layout_return_unused_byclid(struct nfs_client *clp,
+                                     enum pnfs_iomode iomode);
+
 /* nfs4_deviceid_flags */
 enum {
        NFS_DEVICEID_INVALID = 0,       /* set when MDS clientid recalled */