userfaultfd: change mmap_changing to atomic
authorNadav Amit <namit@vmware.com>
Thu, 2 Sep 2021 21:58:56 +0000 (14:58 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Sep 2021 16:58:16 +0000 (09:58 -0700)
Patch series "userfaultfd: minor bug fixes".

Three unrelated bug fixes. The first two addresses possible issues (not
too theoretical ones), but I did not encounter them in practice.

The third patch addresses a test bug that causes the test to fail on my
system. It has been sent before as part of a bigger RFC.

This patch (of 3):

mmap_changing is currently a boolean variable, which is set and cleared
without any lock that protects against concurrent modifications.

mmap_changing is supposed to mark whether userfaultfd page-faults handling
should be retried since mappings are undergoing a change.  However,
concurrent calls, for instance to madvise(MADV_DONTNEED), might cause
mmap_changing to be false, although the remove event was still not read
(hence acknowledged) by the user.

Change mmap_changing to atomic_t and increase/decrease appropriately.  Add
a debug assertion to see whether mmap_changing is negative.

Link: https://lkml.kernel.org/r/20210808020724.1022515-1-namit@vmware.com
Link: https://lkml.kernel.org/r/20210808020724.1022515-2-namit@vmware.com
Fixes: df2cc96e77011 ("userfaultfd: prevent non-cooperative events vs mcopy_atomic races")
Signed-off-by: Nadav Amit <namit@vmware.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/userfaultfd.c
include/linux/userfaultfd_k.h
mm/userfaultfd.c

index 5c2d806e6ae53f1c54ace2788e4b22f64a344f6e..29a3016f16c9c6d2c6bf764f050546ba124c56f6 100644 (file)
@@ -74,7 +74,7 @@ struct userfaultfd_ctx {
        /* released */
        bool released;
        /* memory mappings are changing because of non-cooperative event */
-       bool mmap_changing;
+       atomic_t mmap_changing;
        /* mm with one ore more vmas attached to this userfaultfd_ctx */
        struct mm_struct *mm;
 };
@@ -623,7 +623,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
         * already released.
         */
 out:
-       WRITE_ONCE(ctx->mmap_changing, false);
+       atomic_dec(&ctx->mmap_changing);
+       VM_BUG_ON(atomic_read(&ctx->mmap_changing) < 0);
        userfaultfd_ctx_put(ctx);
 }
 
@@ -669,12 +670,12 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
                ctx->state = UFFD_STATE_RUNNING;
                ctx->features = octx->features;
                ctx->released = false;
-               ctx->mmap_changing = false;
+               atomic_set(&ctx->mmap_changing, 0);
                ctx->mm = vma->vm_mm;
                mmgrab(ctx->mm);
 
                userfaultfd_ctx_get(octx);
-               WRITE_ONCE(octx->mmap_changing, true);
+               atomic_inc(&octx->mmap_changing);
                fctx->orig = octx;
                fctx->new = ctx;
                list_add_tail(&fctx->list, fcs);
@@ -721,7 +722,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
        if (ctx->features & UFFD_FEATURE_EVENT_REMAP) {
                vm_ctx->ctx = ctx;
                userfaultfd_ctx_get(ctx);
-               WRITE_ONCE(ctx->mmap_changing, true);
+               atomic_inc(&ctx->mmap_changing);
        } else {
                /* Drop uffd context if remap feature not enabled */
                vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
@@ -766,7 +767,7 @@ bool userfaultfd_remove(struct vm_area_struct *vma,
                return true;
 
        userfaultfd_ctx_get(ctx);
-       WRITE_ONCE(ctx->mmap_changing, true);
+       atomic_inc(&ctx->mmap_changing);
        mmap_read_unlock(mm);
 
        msg_init(&ewq.msg);
@@ -810,7 +811,7 @@ int userfaultfd_unmap_prep(struct vm_area_struct *vma,
                        return -ENOMEM;
 
                userfaultfd_ctx_get(ctx);
-               WRITE_ONCE(ctx->mmap_changing, true);
+               atomic_inc(&ctx->mmap_changing);
                unmap_ctx->ctx = ctx;
                unmap_ctx->start = start;
                unmap_ctx->end = end;
@@ -1700,7 +1701,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
        user_uffdio_copy = (struct uffdio_copy __user *) arg;
 
        ret = -EAGAIN;
-       if (READ_ONCE(ctx->mmap_changing))
+       if (atomic_read(&ctx->mmap_changing))
                goto out;
 
        ret = -EFAULT;
@@ -1757,7 +1758,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
        user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg;
 
        ret = -EAGAIN;
-       if (READ_ONCE(ctx->mmap_changing))
+       if (atomic_read(&ctx->mmap_changing))
                goto out;
 
        ret = -EFAULT;
@@ -1807,7 +1808,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
        struct userfaultfd_wake_range range;
        bool mode_wp, mode_dontwake;
 
-       if (READ_ONCE(ctx->mmap_changing))
+       if (atomic_read(&ctx->mmap_changing))
                return -EAGAIN;
 
        user_uffdio_wp = (struct uffdio_writeprotect __user *) arg;
@@ -1855,7 +1856,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
        user_uffdio_continue = (struct uffdio_continue __user *)arg;
 
        ret = -EAGAIN;
-       if (READ_ONCE(ctx->mmap_changing))
+       if (atomic_read(&ctx->mmap_changing))
                goto out;
 
        ret = -EFAULT;
@@ -2087,7 +2088,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
        ctx->features = 0;
        ctx->state = UFFD_STATE_WAIT_API;
        ctx->released = false;
-       ctx->mmap_changing = false;
+       atomic_set(&ctx->mmap_changing, 0);
        ctx->mm = current->mm;
        /* prevent the mm struct to be freed */
        mmgrab(ctx->mm);
index 331d2ccf0bccb65960bb3a71c00dee53a6454441..33cea484d1ad7a1b41dd3383ab2fc8033b13762f 100644 (file)
@@ -60,16 +60,16 @@ extern int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
 
 extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
                            unsigned long src_start, unsigned long len,
-                           bool *mmap_changing, __u64 mode);
+                           atomic_t *mmap_changing, __u64 mode);
 extern ssize_t mfill_zeropage(struct mm_struct *dst_mm,
                              unsigned long dst_start,
                              unsigned long len,
-                             bool *mmap_changing);
+                             atomic_t *mmap_changing);
 extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start,
-                             unsigned long len, bool *mmap_changing);
+                             unsigned long len, atomic_t *mmap_changing);
 extern int mwriteprotect_range(struct mm_struct *dst_mm,
                               unsigned long start, unsigned long len,
-                              bool enable_wp, bool *mmap_changing);
+                              bool enable_wp, atomic_t *mmap_changing);
 
 /* mm helpers */
 static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
index 0e2132834bc7d60308a8a6d1783fb2b723c5608e..7a9008415534312b29ab5ad8aaa5e5ad03adc52b 100644 (file)
@@ -483,7 +483,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
                                              unsigned long src_start,
                                              unsigned long len,
                                              enum mcopy_atomic_mode mcopy_mode,
-                                             bool *mmap_changing,
+                                             atomic_t *mmap_changing,
                                              __u64 mode)
 {
        struct vm_area_struct *dst_vma;
@@ -517,7 +517,7 @@ retry:
         * request the user to retry later
         */
        err = -EAGAIN;
-       if (mmap_changing && READ_ONCE(*mmap_changing))
+       if (mmap_changing && atomic_read(mmap_changing))
                goto out_unlock;
 
        /*
@@ -650,28 +650,29 @@ out:
 
 ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
                     unsigned long src_start, unsigned long len,
-                    bool *mmap_changing, __u64 mode)
+                    atomic_t *mmap_changing, __u64 mode)
 {
        return __mcopy_atomic(dst_mm, dst_start, src_start, len,
                              MCOPY_ATOMIC_NORMAL, mmap_changing, mode);
 }
 
 ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
-                      unsigned long len, bool *mmap_changing)
+                      unsigned long len, atomic_t *mmap_changing)
 {
        return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE,
                              mmap_changing, 0);
 }
 
 ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
-                      unsigned long len, bool *mmap_changing)
+                      unsigned long len, atomic_t *mmap_changing)
 {
        return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_CONTINUE,
                              mmap_changing, 0);
 }
 
 int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
-                       unsigned long len, bool enable_wp, bool *mmap_changing)
+                       unsigned long len, bool enable_wp,
+                       atomic_t *mmap_changing)
 {
        struct vm_area_struct *dst_vma;
        pgprot_t newprot;
@@ -694,7 +695,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
         * request the user to retry later
         */
        err = -EAGAIN;
-       if (mmap_changing && READ_ONCE(*mmap_changing))
+       if (mmap_changing && atomic_read(mmap_changing))
                goto out_unlock;
 
        err = -ENOENT;