Merge tag 'nfsd-5.14' of git://linux-nfs.org/~bfields/linux
[sfrench/cifs-2.6.git] / fs / nfsd / nfs4state.c
index 7698172ac0c7619319393ed03c56b511f453d14e..fa67ecd5fe63f7d1502e2c135afe63914054bd03 100644 (file)
@@ -44,6 +44,7 @@
 #include <linux/jhash.h>
 #include <linux/string_helpers.h>
 #include <linux/fsnotify.h>
+#include <linux/nfs_ssc.h>
 #include "xdr4.h"
 #include "xdr4cb.h"
 #include "vfs.h"
@@ -354,6 +355,124 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
        .release        = nfsd4_cb_notify_lock_release,
 };
 
+/*
+ * We store the NONE, READ, WRITE, and BOTH bits separately in the
+ * st_{access,deny}_bmap field of the stateid, in order to track not
+ * only what share bits are currently in force, but also what
+ * combinations of share bits previous opens have used.  This allows us
+ * to enforce the recommendation of rfc 3530 14.2.19 that the server
+ * return an error if the client attempt to downgrade to a combination
+ * of share bits not explicable by closing some of its previous opens.
+ *
+ * XXX: This enforcement is actually incomplete, since we don't keep
+ * track of access/deny bit combinations; so, e.g., we allow:
+ *
+ *     OPEN allow read, deny write
+ *     OPEN allow both, deny none
+ *     DOWNGRADE allow read, deny none
+ *
+ * which we should reject.
+ */
+static unsigned int
+bmap_to_share_mode(unsigned long bmap)
+{
+       int i;
+       unsigned int access = 0;
+
+       for (i = 1; i < 4; i++) {
+               if (test_bit(i, &bmap))
+                       access |= i;
+       }
+       return access;
+}
+
+/* set share access for a given stateid */
+static inline void
+set_access(u32 access, struct nfs4_ol_stateid *stp)
+{
+       unsigned char mask = 1 << access;
+
+       WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
+       stp->st_access_bmap |= mask;
+}
+
+/* clear share access for a given stateid */
+static inline void
+clear_access(u32 access, struct nfs4_ol_stateid *stp)
+{
+       unsigned char mask = 1 << access;
+
+       WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
+       stp->st_access_bmap &= ~mask;
+}
+
+/* test whether a given stateid has access */
+static inline bool
+test_access(u32 access, struct nfs4_ol_stateid *stp)
+{
+       unsigned char mask = 1 << access;
+
+       return (bool)(stp->st_access_bmap & mask);
+}
+
+/* set share deny for a given stateid */
+static inline void
+set_deny(u32 deny, struct nfs4_ol_stateid *stp)
+{
+       unsigned char mask = 1 << deny;
+
+       WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
+       stp->st_deny_bmap |= mask;
+}
+
+/* clear share deny for a given stateid */
+static inline void
+clear_deny(u32 deny, struct nfs4_ol_stateid *stp)
+{
+       unsigned char mask = 1 << deny;
+
+       WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
+       stp->st_deny_bmap &= ~mask;
+}
+
+/* test whether a given stateid is denying specific access */
+static inline bool
+test_deny(u32 deny, struct nfs4_ol_stateid *stp)
+{
+       unsigned char mask = 1 << deny;
+
+       return (bool)(stp->st_deny_bmap & mask);
+}
+
+static int nfs4_access_to_omode(u32 access)
+{
+       switch (access & NFS4_SHARE_ACCESS_BOTH) {
+       case NFS4_SHARE_ACCESS_READ:
+               return O_RDONLY;
+       case NFS4_SHARE_ACCESS_WRITE:
+               return O_WRONLY;
+       case NFS4_SHARE_ACCESS_BOTH:
+               return O_RDWR;
+       }
+       WARN_ON_ONCE(1);
+       return O_RDONLY;
+}
+
+static inline int
+access_permit_read(struct nfs4_ol_stateid *stp)
+{
+       return test_access(NFS4_SHARE_ACCESS_READ, stp) ||
+               test_access(NFS4_SHARE_ACCESS_BOTH, stp) ||
+               test_access(NFS4_SHARE_ACCESS_WRITE, stp);
+}
+
+static inline int
+access_permit_write(struct nfs4_ol_stateid *stp)
+{
+       return test_access(NFS4_SHARE_ACCESS_WRITE, stp) ||
+               test_access(NFS4_SHARE_ACCESS_BOTH, stp);
+}
+
 static inline struct nfs4_stateowner *
 nfs4_get_stateowner(struct nfs4_stateowner *sop)
 {
@@ -543,14 +662,12 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername)
 #define FILE_HASH_BITS                   8
 #define FILE_HASH_SIZE                  (1 << FILE_HASH_BITS)
 
-static unsigned int nfsd_fh_hashval(struct knfsd_fh *fh)
+static unsigned int file_hashval(struct svc_fh *fh)
 {
-       return jhash2(fh->fh_base.fh_pad, XDR_QUADLEN(fh->fh_size), 0);
-}
+       struct inode *inode = d_inode(fh->fh_dentry);
 
-static unsigned int file_hashval(struct knfsd_fh *fh)
-{
-       return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1);
+       /* XXX: why not (here & in file cache) use inode? */
+       return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS);
 }
 
 static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
@@ -1152,108 +1269,6 @@ static unsigned int clientstr_hashval(struct xdr_netobj name)
        return opaque_hashval(name.data, 8) & CLIENT_HASH_MASK;
 }
 
-/*
- * We store the NONE, READ, WRITE, and BOTH bits separately in the
- * st_{access,deny}_bmap field of the stateid, in order to track not
- * only what share bits are currently in force, but also what
- * combinations of share bits previous opens have used.  This allows us
- * to enforce the recommendation of rfc 3530 14.2.19 that the server
- * return an error if the client attempt to downgrade to a combination
- * of share bits not explicable by closing some of its previous opens.
- *
- * XXX: This enforcement is actually incomplete, since we don't keep
- * track of access/deny bit combinations; so, e.g., we allow:
- *
- *     OPEN allow read, deny write
- *     OPEN allow both, deny none
- *     DOWNGRADE allow read, deny none
- *
- * which we should reject.
- */
-static unsigned int
-bmap_to_share_mode(unsigned long bmap) {
-       int i;
-       unsigned int access = 0;
-
-       for (i = 1; i < 4; i++) {
-               if (test_bit(i, &bmap))
-                       access |= i;
-       }
-       return access;
-}
-
-/* set share access for a given stateid */
-static inline void
-set_access(u32 access, struct nfs4_ol_stateid *stp)
-{
-       unsigned char mask = 1 << access;
-
-       WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
-       stp->st_access_bmap |= mask;
-}
-
-/* clear share access for a given stateid */
-static inline void
-clear_access(u32 access, struct nfs4_ol_stateid *stp)
-{
-       unsigned char mask = 1 << access;
-
-       WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
-       stp->st_access_bmap &= ~mask;
-}
-
-/* test whether a given stateid has access */
-static inline bool
-test_access(u32 access, struct nfs4_ol_stateid *stp)
-{
-       unsigned char mask = 1 << access;
-
-       return (bool)(stp->st_access_bmap & mask);
-}
-
-/* set share deny for a given stateid */
-static inline void
-set_deny(u32 deny, struct nfs4_ol_stateid *stp)
-{
-       unsigned char mask = 1 << deny;
-
-       WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
-       stp->st_deny_bmap |= mask;
-}
-
-/* clear share deny for a given stateid */
-static inline void
-clear_deny(u32 deny, struct nfs4_ol_stateid *stp)
-{
-       unsigned char mask = 1 << deny;
-
-       WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
-       stp->st_deny_bmap &= ~mask;
-}
-
-/* test whether a given stateid is denying specific access */
-static inline bool
-test_deny(u32 deny, struct nfs4_ol_stateid *stp)
-{
-       unsigned char mask = 1 << deny;
-
-       return (bool)(stp->st_deny_bmap & mask);
-}
-
-static int nfs4_access_to_omode(u32 access)
-{
-       switch (access & NFS4_SHARE_ACCESS_BOTH) {
-       case NFS4_SHARE_ACCESS_READ:
-               return O_RDONLY;
-       case NFS4_SHARE_ACCESS_WRITE:
-               return O_WRONLY;
-       case NFS4_SHARE_ACCESS_BOTH:
-               return O_RDWR;
-       }
-       WARN_ON_ONCE(1);
-       return O_RDONLY;
-}
-
 /*
  * A stateid that had a deny mode associated with it is being released
  * or downgraded. Recalculate the deny mode on the file.
@@ -1731,6 +1746,8 @@ static void nfsd4_conn_lost(struct svc_xpt_user *u)
        struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user);
        struct nfs4_client *clp = c->cn_session->se_client;
 
+       trace_nfsd_cb_lost(clp);
+
        spin_lock(&clp->cl_lock);
        if (!list_empty(&c->cn_persession)) {
                list_del(&c->cn_persession);
@@ -2337,10 +2354,25 @@ static struct nfs4_client *get_nfsdfs_clp(struct inode *inode)
 static void seq_quote_mem(struct seq_file *m, char *data, int len)
 {
        seq_printf(m, "\"");
-       seq_escape_mem_ascii(m, data, len);
+       seq_escape_mem(m, data, len, ESCAPE_HEX | ESCAPE_NAP | ESCAPE_APPEND, "\"\\");
        seq_printf(m, "\"");
 }
 
+static const char *cb_state2str(int state)
+{
+       switch (state) {
+       case NFSD4_CB_UP:
+               return "UP";
+       case NFSD4_CB_UNKNOWN:
+               return "UNKNOWN";
+       case NFSD4_CB_DOWN:
+               return "DOWN";
+       case NFSD4_CB_FAULT:
+               return "FAULT";
+       }
+       return "UNDEFINED";
+}
+
 static int client_info_show(struct seq_file *m, void *v)
 {
        struct inode *inode = m->private;
@@ -2369,6 +2401,8 @@ static int client_info_show(struct seq_file *m, void *v)
                seq_printf(m, "\nImplementation time: [%lld, %ld]\n",
                        clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec);
        }
+       seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
+       seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
        drop_client(clp);
 
        return 0;
@@ -2651,6 +2685,8 @@ static void force_expire_client(struct nfs4_client *clp)
        struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
        bool already_expired;
 
+       trace_nfsd_clid_admin_expired(&clp->cl_clientid);
+
        spin_lock(&clp->cl_lock);
        clp->cl_time = 0;
        spin_unlock(&clp->cl_lock);
@@ -2802,14 +2838,11 @@ move_to_confirmed(struct nfs4_client *clp)
 
        lockdep_assert_held(&nn->client_lock);
 
-       dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
        list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]);
        rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
        add_clp_to_name_tree(clp, &nn->conf_name_tree);
-       if (!test_and_set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags) &&
-           clp->cl_nfsd_dentry &&
-           clp->cl_nfsd_info_dentry)
-               fsnotify_dentry(clp->cl_nfsd_info_dentry, FS_MODIFY);
+       set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
+       trace_nfsd_clid_confirmed(&clp->cl_clientid);
        renew_client_locked(clp);
 }
 
@@ -3125,6 +3158,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                        goto out_nolock;
                }
                new->cl_mach_cred = true;
+               break;
        case SP4_NONE:
                break;
        default:                                /* checked by xdr code */
@@ -3161,20 +3195,24 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                        }
                        /* case 6 */
                        exid->flags |= EXCHGID4_FLAG_CONFIRMED_R;
+                       trace_nfsd_clid_confirmed_r(conf);
                        goto out_copy;
                }
                if (!creds_match) { /* case 3 */
                        if (client_has_state(conf)) {
                                status = nfserr_clid_inuse;
+                               trace_nfsd_clid_cred_mismatch(conf, rqstp);
                                goto out;
                        }
                        goto out_new;
                }
                if (verfs_match) { /* case 2 */
                        conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R;
+                       trace_nfsd_clid_confirmed_r(conf);
                        goto out_copy;
                }
                /* case 5, client reboot */
+               trace_nfsd_clid_verf_mismatch(conf, rqstp, &verf);
                conf = NULL;
                goto out_new;
        }
@@ -3184,16 +3222,19 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                goto out;
        }
 
-       unconf  = find_unconfirmed_client_by_name(&exid->clname, nn);
+       unconf = find_unconfirmed_client_by_name(&exid->clname, nn);
        if (unconf) /* case 4, possible retry or client restart */
                unhash_client_locked(unconf);
 
-       /* case 1 (normal case) */
+       /* case 1, new owner ID */
+       trace_nfsd_clid_fresh(new);
+
 out_new:
        if (conf) {
                status = mark_client_expired_locked(conf);
                if (status)
                        goto out;
+               trace_nfsd_clid_replaced(&conf->cl_clientid);
        }
        new->cl_minorversion = cstate->minorversion;
        new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0];
@@ -3217,8 +3258,10 @@ out:
 out_nolock:
        if (new)
                expire_client(new);
-       if (unconf)
+       if (unconf) {
+               trace_nfsd_clid_expire_unconf(&unconf->cl_clientid);
                expire_client(unconf);
+       }
        return status;
 }
 
@@ -3410,9 +3453,10 @@ nfsd4_create_session(struct svc_rqst *rqstp,
                        goto out_free_conn;
                }
        } else if (unconf) {
+               status = nfserr_clid_inuse;
                if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
                    !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
-                       status = nfserr_clid_inuse;
+                       trace_nfsd_clid_cred_mismatch(unconf, rqstp);
                        goto out_free_conn;
                }
                status = nfserr_wrong_cred;
@@ -3432,6 +3476,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
                                old = NULL;
                                goto out_free_conn;
                        }
+                       trace_nfsd_clid_replaced(&old->cl_clientid);
                }
                move_to_confirmed(unconf);
                conf = unconf;
@@ -3456,6 +3501,8 @@ nfsd4_create_session(struct svc_rqst *rqstp,
        /* cache solo and embedded create sessions under the client_lock */
        nfsd4_cache_create_session(cr_ses, cs_slot, status);
        spin_unlock(&nn->client_lock);
+       if (conf == unconf)
+               fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY);
        /* init connection and backchannel */
        nfsd4_init_conn(rqstp, conn, new);
        nfsd4_put_session(new);
@@ -3889,6 +3936,7 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp,
                status = nfserr_wrong_cred;
                goto out;
        }
+       trace_nfsd_clid_destroyed(&clp->cl_clientid);
        unhash_client_locked(clp);
 out:
        spin_unlock(&nn->client_lock);
@@ -3931,6 +3979,7 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp,
                goto out;
 
        status = nfs_ok;
+       trace_nfsd_clid_reclaim_complete(&clp->cl_clientid);
        nfsd4_client_record_create(clp);
        inc_reclaim_complete(clp);
 out:
@@ -3952,27 +4001,29 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        new = create_client(clname, rqstp, &clverifier);
        if (new == NULL)
                return nfserr_jukebox;
-       /* Cases below refer to rfc 3530 section 14.2.33: */
        spin_lock(&nn->client_lock);
        conf = find_confirmed_client_by_name(&clname, nn);
        if (conf && client_has_state(conf)) {
-               /* case 0: */
                status = nfserr_clid_inuse;
                if (clp_used_exchangeid(conf))
                        goto out;
                if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
-                       trace_nfsd_clid_inuse_err(conf);
+                       trace_nfsd_clid_cred_mismatch(conf, rqstp);
                        goto out;
                }
        }
        unconf = find_unconfirmed_client_by_name(&clname, nn);
        if (unconf)
                unhash_client_locked(unconf);
-       /* We need to handle only case 1: probable callback update */
-       if (conf && same_verf(&conf->cl_verifier, &clverifier)) {
-               copy_clid(new, conf);
-               gen_confirm(new, nn);
-       }
+       if (conf) {
+               if (same_verf(&conf->cl_verifier, &clverifier)) {
+                       copy_clid(new, conf);
+                       gen_confirm(new, nn);
+               } else
+                       trace_nfsd_clid_verf_mismatch(conf, rqstp,
+                                                     &clverifier);
+       } else
+               trace_nfsd_clid_fresh(new);
        new->cl_minorversion = 0;
        gen_callback(new, setclid, rqstp);
        add_to_unconfirmed(new);
@@ -3985,12 +4036,13 @@ out:
        spin_unlock(&nn->client_lock);
        if (new)
                free_client(new);
-       if (unconf)
+       if (unconf) {
+               trace_nfsd_clid_expire_unconf(&unconf->cl_clientid);
                expire_client(unconf);
+       }
        return status;
 }
 
-
 __be32
 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
                        struct nfsd4_compound_state *cstate,
@@ -4019,25 +4071,27 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
         * Nevertheless, RFC 7530 recommends INUSE for this case:
         */
        status = nfserr_clid_inuse;
-       if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred))
+       if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
+               trace_nfsd_clid_cred_mismatch(unconf, rqstp);
                goto out;
-       if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred))
+       }
+       if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
+               trace_nfsd_clid_cred_mismatch(conf, rqstp);
                goto out;
-       /* cases below refer to rfc 3530 section 14.2.34: */
+       }
        if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) {
                if (conf && same_verf(&confirm, &conf->cl_confirm)) {
-                       /* case 2: probable retransmit */
                        status = nfs_ok;
-               } else /* case 4: client hasn't noticed we rebooted yet? */
+               } else
                        status = nfserr_stale_clientid;
                goto out;
        }
        status = nfs_ok;
-       if (conf) { /* case 1: callback update */
+       if (conf) {
                old = unconf;
                unhash_client_locked(old);
                nfsd4_change_callback(conf, &unconf->cl_cb_conn);
-       } else { /* case 3: normal case; new or rebooted client */
+       } else {
                old = find_confirmed_client_by_name(&unconf->cl_name, nn);
                if (old) {
                        status = nfserr_clid_inuse;
@@ -4050,12 +4104,15 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
                                old = NULL;
                                goto out;
                        }
+                       trace_nfsd_clid_replaced(&old->cl_clientid);
                }
                move_to_confirmed(unconf);
                conf = unconf;
        }
        get_client_locked(conf);
        spin_unlock(&nn->client_lock);
+       if (conf == unconf)
+               fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY);
        nfsd4_probe_callback(conf);
        spin_lock(&nn->client_lock);
        put_client_renew_locked(conf);
@@ -4072,7 +4129,7 @@ static struct nfs4_file *nfsd4_alloc_file(void)
 }
 
 /* OPEN Share state helper functions */
-static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
+static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval,
                                struct nfs4_file *fp)
 {
        lockdep_assert_held(&state_lock);
@@ -4082,12 +4139,14 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
        INIT_LIST_HEAD(&fp->fi_stateids);
        INIT_LIST_HEAD(&fp->fi_delegations);
        INIT_LIST_HEAD(&fp->fi_clnt_odstate);
-       fh_copy_shallow(&fp->fi_fhandle, fh);
+       fh_copy_shallow(&fp->fi_fhandle, &fh->fh_handle);
        fp->fi_deleg_file = NULL;
        fp->fi_had_conflict = false;
        fp->fi_share_deny = 0;
        memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
        memset(fp->fi_access, 0, sizeof(fp->fi_access));
+       fp->fi_aliased = false;
+       fp->fi_inode = d_inode(fh->fh_dentry);
 #ifdef CONFIG_NFSD_PNFS
        INIT_LIST_HEAD(&fp->fi_lo_states);
        atomic_set(&fp->fi_lo_recalls, 0);
@@ -4426,13 +4485,13 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
 
 /* search file_hashtbl[] for file */
 static struct nfs4_file *
-find_file_locked(struct knfsd_fh *fh, unsigned int hashval)
+find_file_locked(struct svc_fh *fh, unsigned int hashval)
 {
        struct nfs4_file *fp;
 
        hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash,
                                lockdep_is_held(&state_lock)) {
-               if (fh_match(&fp->fi_fhandle, fh)) {
+               if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) {
                        if (refcount_inc_not_zero(&fp->fi_ref))
                                return fp;
                }
@@ -4440,8 +4499,32 @@ find_file_locked(struct knfsd_fh *fh, unsigned int hashval)
        return NULL;
 }
 
-struct nfs4_file *
-find_file(struct knfsd_fh *fh)
+static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh,
+                                    unsigned int hashval)
+{
+       struct nfs4_file *fp;
+       struct nfs4_file *ret = NULL;
+       bool alias_found = false;
+
+       spin_lock(&state_lock);
+       hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash,
+                                lockdep_is_held(&state_lock)) {
+               if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) {
+                       if (refcount_inc_not_zero(&fp->fi_ref))
+                               ret = fp;
+               } else if (d_inode(fh->fh_dentry) == fp->fi_inode)
+                       fp->fi_aliased = alias_found = true;
+       }
+       if (likely(ret == NULL)) {
+               nfsd4_init_file(fh, hashval, new);
+               new->fi_aliased = alias_found;
+               ret = new;
+       }
+       spin_unlock(&state_lock);
+       return ret;
+}
+
+static struct nfs4_file * find_file(struct svc_fh *fh)
 {
        struct nfs4_file *fp;
        unsigned int hashval = file_hashval(fh);
@@ -4453,7 +4536,7 @@ find_file(struct knfsd_fh *fh)
 }
 
 static struct nfs4_file *
-find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh)
+find_or_add_file(struct nfs4_file *new, struct svc_fh *fh)
 {
        struct nfs4_file *fp;
        unsigned int hashval = file_hashval(fh);
@@ -4464,15 +4547,7 @@ find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh)
        if (fp)
                return fp;
 
-       spin_lock(&state_lock);
-       fp = find_file_locked(fh, hashval);
-       if (likely(fp == NULL)) {
-               nfsd4_init_file(fh, hashval, new);
-               fp = new;
-       }
-       spin_unlock(&state_lock);
-
-       return fp;
+       return insert_file(new, fh, hashval);
 }
 
 /*
@@ -4485,7 +4560,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
        struct nfs4_file *fp;
        __be32 ret = nfs_ok;
 
-       fp = find_file(&current_fh->fh_handle);
+       fp = find_file(current_fh);
        if (!fp)
                return ret;
        /* Check for conflicting share reservations */
@@ -4585,7 +4660,7 @@ nfsd_break_deleg_cb(struct file_lock *fl)
        struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
        struct nfs4_file *fp = dp->dl_stid.sc_file;
 
-       trace_nfsd_deleg_break(&dp->dl_stid.sc_stateid);
+       trace_nfsd_cb_recall(&dp->dl_stid);
 
        /*
         * We don't want the locks code to timeout the lease for us;
@@ -4880,6 +4955,11 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
        if (nf)
                nfsd_file_put(nf);
 
+       status = nfserrno(nfsd_open_break_lease(cur_fh->fh_dentry->d_inode,
+                                                               access));
+       if (status)
+               goto out_put_access;
+
        status = nfsd4_truncate(rqstp, cur_fh, open);
        if (status)
                goto out_put_access;
@@ -4951,6 +5031,65 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
        return fl;
 }
 
+static int nfsd4_check_conflicting_opens(struct nfs4_client *clp,
+                                        struct nfs4_file *fp)
+{
+       struct nfs4_ol_stateid *st;
+       struct file *f = fp->fi_deleg_file->nf_file;
+       struct inode *ino = locks_inode(f);
+       int writes;
+
+       writes = atomic_read(&ino->i_writecount);
+       if (!writes)
+               return 0;
+       /*
+        * There could be multiple filehandles (hence multiple
+        * nfs4_files) referencing this file, but that's not too
+        * common; let's just give up in that case rather than
+        * trying to go look up all the clients using that other
+        * nfs4_file as well:
+        */
+       if (fp->fi_aliased)
+               return -EAGAIN;
+       /*
+        * If there's a close in progress, make sure that we see it
+        * clear any fi_fds[] entries before we see it decrement
+        * i_writecount:
+        */
+       smp_mb__after_atomic();
+
+       if (fp->fi_fds[O_WRONLY])
+               writes--;
+       if (fp->fi_fds[O_RDWR])
+               writes--;
+       if (writes > 0)
+               return -EAGAIN; /* There may be non-NFSv4 writers */
+       /*
+        * It's possible there are non-NFSv4 write opens in progress,
+        * but if they haven't incremented i_writecount yet then they
+        * also haven't called break lease yet; so, they'll break this
+        * lease soon enough.  So, all that's left to check for is NFSv4
+        * opens:
+        */
+       spin_lock(&fp->fi_lock);
+       list_for_each_entry(st, &fp->fi_stateids, st_perfile) {
+               if (st->st_openstp == NULL /* it's an open */ &&
+                   access_permit_write(st) &&
+                   st->st_stid.sc_client != clp) {
+                       spin_unlock(&fp->fi_lock);
+                       return -EAGAIN;
+               }
+       }
+       spin_unlock(&fp->fi_lock);
+       /*
+        * There's a small chance that we could be racing with another
+        * NFSv4 open.  However, any open that hasn't added itself to
+        * the fi_stateids list also hasn't called break_lease yet; so,
+        * they'll break this lease soon enough.
+        */
+       return 0;
+}
+
 static struct nfs4_delegation *
 nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
                    struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
@@ -4970,9 +5109,12 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
 
        nf = find_readable_file(fp);
        if (!nf) {
-               /* We should always have a readable file here */
-               WARN_ON_ONCE(1);
-               return ERR_PTR(-EBADF);
+               /*
+                * We probably could attempt another open and get a read
+                * delegation, but for now, don't bother until the
+                * client actually sends us one.
+                */
+               return ERR_PTR(-EAGAIN);
        }
        spin_lock(&state_lock);
        spin_lock(&fp->fi_lock);
@@ -5007,6 +5149,9 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
                locks_free_lock(fl);
        if (status)
                goto out_clnt_odstate;
+       status = nfsd4_check_conflicting_opens(clp, fp);
+       if (status)
+               goto out_unlock;
 
        spin_lock(&state_lock);
        spin_lock(&fp->fi_lock);
@@ -5088,17 +5233,6 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
                                goto out_no_deleg;
                        if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
                                goto out_no_deleg;
-                       /*
-                        * Also, if the file was opened for write or
-                        * create, there's a good chance the client's
-                        * about to write to it, resulting in an
-                        * immediate recall (since we don't support
-                        * write delegations):
-                        */
-                       if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
-                               goto out_no_deleg;
-                       if (open->op_create == NFS4_OPEN_CREATE)
-                               goto out_no_deleg;
                        break;
                default:
                        goto out_no_deleg;
@@ -5161,7 +5295,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
         * and check for delegations in the process of being recalled.
         * If not found, create the nfs4_file struct
         */
-       fp = find_or_add_file(open->op_file, &current_fh->fh_handle);
+       fp = find_or_add_file(open->op_file, current_fh);
        if (fp != open->op_file) {
                status = nfs4_check_deleg(cl, open, &dp);
                if (status)
@@ -5365,6 +5499,69 @@ static bool state_expired(struct laundry_time *lt, time64_t last_refresh)
        return false;
 }
 
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+void nfsd4_ssc_init_umount_work(struct nfsd_net *nn)
+{
+       spin_lock_init(&nn->nfsd_ssc_lock);
+       INIT_LIST_HEAD(&nn->nfsd_ssc_mount_list);
+       init_waitqueue_head(&nn->nfsd_ssc_waitq);
+}
+EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work);
+
+/*
+ * This is called when nfsd is being shutdown, after all inter_ssc
+ * cleanup were done, to destroy the ssc delayed unmount list.
+ */
+static void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn)
+{
+       struct nfsd4_ssc_umount_item *ni = NULL;
+       struct nfsd4_ssc_umount_item *tmp;
+
+       spin_lock(&nn->nfsd_ssc_lock);
+       list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) {
+               list_del(&ni->nsui_list);
+               spin_unlock(&nn->nfsd_ssc_lock);
+               mntput(ni->nsui_vfsmount);
+               kfree(ni);
+               spin_lock(&nn->nfsd_ssc_lock);
+       }
+       spin_unlock(&nn->nfsd_ssc_lock);
+}
+
+static void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
+{
+       bool do_wakeup = false;
+       struct nfsd4_ssc_umount_item *ni = 0;
+       struct nfsd4_ssc_umount_item *tmp;
+
+       spin_lock(&nn->nfsd_ssc_lock);
+       list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) {
+               if (time_after(jiffies, ni->nsui_expire)) {
+                       if (refcount_read(&ni->nsui_refcnt) > 1)
+                               continue;
+
+                       /* mark being unmount */
+                       ni->nsui_busy = true;
+                       spin_unlock(&nn->nfsd_ssc_lock);
+                       mntput(ni->nsui_vfsmount);
+                       spin_lock(&nn->nfsd_ssc_lock);
+
+                       /* waiters need to start from begin of list */
+                       list_del(&ni->nsui_list);
+                       kfree(ni);
+
+                       /* wakeup ssc_connect waiters */
+                       do_wakeup = true;
+                       continue;
+               }
+               break;
+       }
+       if (do_wakeup)
+               wake_up_all(&nn->nfsd_ssc_waitq);
+       spin_unlock(&nn->nfsd_ssc_lock);
+}
+#endif
+
 static time64_t
 nfs4_laundromat(struct nfsd_net *nn)
 {
@@ -5403,10 +5600,8 @@ nfs4_laundromat(struct nfsd_net *nn)
                clp = list_entry(pos, struct nfs4_client, cl_lru);
                if (!state_expired(&lt, clp->cl_time))
                        break;
-               if (mark_client_expired_locked(clp)) {
-                       trace_nfsd_clid_expired(&clp->cl_clientid);
+               if (mark_client_expired_locked(clp))
                        continue;
-               }
                list_add(&clp->cl_lru, &reaplist);
        }
        spin_unlock(&nn->client_lock);
@@ -5476,6 +5671,10 @@ nfs4_laundromat(struct nfsd_net *nn)
                list_del_init(&nbl->nbl_lru);
                free_blocked_lock(nbl);
        }
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+       /* service the server-to-server copy delayed unmount list */
+       nfsd4_ssc_expire_umount(nn);
+#endif
 out:
        return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
 }
@@ -5502,21 +5701,6 @@ static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
        return nfs_ok;
 }
 
-static inline int
-access_permit_read(struct nfs4_ol_stateid *stp)
-{
-       return test_access(NFS4_SHARE_ACCESS_READ, stp) ||
-               test_access(NFS4_SHARE_ACCESS_BOTH, stp) ||
-               test_access(NFS4_SHARE_ACCESS_WRITE, stp);
-}
-
-static inline int
-access_permit_write(struct nfs4_ol_stateid *stp)
-{
-       return test_access(NFS4_SHARE_ACCESS_WRITE, stp) ||
-               test_access(NFS4_SHARE_ACCESS_BOTH, stp);
-}
-
 static
 __be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags)
 {
@@ -6288,15 +6472,6 @@ out:
        return status;
 }
 
-static inline u64
-end_offset(u64 start, u64 len)
-{
-       u64 end;
-
-       end = start + len;
-       return end >= start ? end: NFS4_MAX_UINT64;
-}
-
 /* last octet in a range */
 static inline u64
 last_byte_offset(u64 start, u64 len)
@@ -6362,8 +6537,10 @@ nfsd4_lm_notify(struct file_lock *fl)
        }
        spin_unlock(&nn->blocked_locks_lock);
 
-       if (queue)
+       if (queue) {
+               trace_nfsd_cb_notify_lock(lo, nbl);
                nfsd4_run_cb(&nbl->nbl_cb);
+       }
 }
 
 static const struct lock_manager_operations nfsd_posix_mng_ops  = {
@@ -6865,11 +7042,20 @@ out:
 static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
 {
        struct nfsd_file *nf;
-       __be32 err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
-       if (!err) {
-               err = nfserrno(vfs_test_lock(nf->nf_file, lock));
-               nfsd_file_put(nf);
-       }
+       __be32 err;
+
+       err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
+       if (err)
+               return err;
+       fh_lock(fhp); /* to block new leases till after test_lock: */
+       err = nfserrno(nfsd_open_break_lease(fhp->fh_dentry->d_inode,
+                                                       NFSD_MAY_READ));
+       if (err)
+               goto out;
+       err = nfserrno(vfs_test_lock(nf->nf_file, lock));
+out:
+       fh_unlock(fhp);
+       nfsd_file_put(nf);
        return err;
 }
 
@@ -7152,7 +7338,6 @@ nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash,
        unsigned int strhashval;
        struct nfs4_client_reclaim *crp;
 
-       trace_nfsd_clid_reclaim(nn, name.len, name.data);
        crp = alloc_reclaim();
        if (crp) {
                strhashval = clientstr_hashval(name);
@@ -7202,8 +7387,6 @@ nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn)
        unsigned int strhashval;
        struct nfs4_client_reclaim *crp = NULL;
 
-       trace_nfsd_clid_find(nn, name.len, name.data);
-
        strhashval = clientstr_hashval(name);
        list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) {
                if (compare_blob(&crp->cr_name, &name) == 0) {
@@ -7409,6 +7592,9 @@ nfs4_state_shutdown_net(struct net *net)
 
        nfsd4_client_tracking_exit(net);
        nfs4_state_destroy_net(net);
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+       nfsd4_ssc_shutdown_umount(nn);
+#endif
 }
 
 void