fix mntput/mntput race

author Al Viro <viro@zeniv.linux.org.uk>

Thu, 9 Aug 2018 21:21:17 +0000 (17:21 -0400)

committer Al Viro <viro@zeniv.linux.org.uk>

Thu, 9 Aug 2018 21:21:17 +0000 (17:21 -0400)
author Al Viro <viro@zeniv.linux.org.uk>
Thu, 9 Aug 2018 21:21:17 +0000 (17:21 -0400)
committer Al Viro <viro@zeniv.linux.org.uk>
Thu, 9 Aug 2018 21:21:17 +0000 (17:21 -0400)
diff --git a/fs/namespace.c b/fs/namespace.c

index 8ddd14806799db5d701ffd1eee41b650dfba3313..d46a951bd54112db96483180c097ad367cb08dd5 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1195,12 +1195,22 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
  static void mntput_no_expire(struct mount *mnt)
  {
         rcu_read_lock();
-       mnt_add_count(mnt, -1);
-       if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
+       if (likely(READ_ONCE(mnt->mnt_ns))) {
+               /*
+                * Since we don't do lock_mount_hash() here,
+                * ->mnt_ns can change under us.  However, if it's
+                * non-NULL, then there's a reference that won't
+                * be dropped until after an RCU delay done after
+                * turning ->mnt_ns NULL.  So if we observe it
+                * non-NULL under rcu_read_lock(), the reference
+                * we are dropping is not the final one.
+                */
+               mnt_add_count(mnt, -1);
                 rcu_read_unlock();
                 return;
         }
         lock_mount_hash();
+       mnt_add_count(mnt, -1);
         if (mnt_get_count(mnt)) {
                 rcu_read_unlock();
                 unlock_mount_hash();
author	Al Viro <viro@zeniv.linux.org.uk>
	Thu, 9 Aug 2018 21:21:17 +0000 (17:21 -0400)
committer	Al Viro <viro@zeniv.linux.org.uk>
	Thu, 9 Aug 2018 21:21:17 +0000 (17:21 -0400)