Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi...
[sfrench/cifs-2.6.git] / fs / userfaultfd.c
index 18406158e13fbf5e9b4e7041489f2d20e404c075..973607df579db324e64da1c3c26999d60853fa80 100644 (file)
@@ -14,7 +14,8 @@
 
 #include <linux/list.h>
 #include <linux/hashtable.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/mm.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
@@ -71,6 +72,13 @@ struct userfaultfd_fork_ctx {
        struct list_head list;
 };
 
+struct userfaultfd_unmap_ctx {
+       struct userfaultfd_ctx *ctx;
+       unsigned long start;
+       unsigned long end;
+       struct list_head list;
+};
+
 struct userfaultfd_wait_queue {
        struct uffd_msg msg;
        wait_queue_t wq;
@@ -681,16 +689,16 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx,
        userfaultfd_event_wait_completion(ctx, &ewq);
 }
 
-void madvise_userfault_dontneed(struct vm_area_struct *vma,
-                               struct vm_area_struct **prev,
-                               unsigned long start, unsigned long end)
+void userfaultfd_remove(struct vm_area_struct *vma,
+                       struct vm_area_struct **prev,
+                       unsigned long start, unsigned long end)
 {
        struct mm_struct *mm = vma->vm_mm;
        struct userfaultfd_ctx *ctx;
        struct userfaultfd_wait_queue ewq;
 
        ctx = vma->vm_userfaultfd_ctx.ctx;
-       if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_MADVDONTNEED))
+       if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE))
                return;
 
        userfaultfd_ctx_get(ctx);
@@ -700,15 +708,101 @@ void madvise_userfault_dontneed(struct vm_area_struct *vma,
 
        msg_init(&ewq.msg);
 
-       ewq.msg.event = UFFD_EVENT_MADVDONTNEED;
-       ewq.msg.arg.madv_dn.start = start;
-       ewq.msg.arg.madv_dn.end = end;
+       ewq.msg.event = UFFD_EVENT_REMOVE;
+       ewq.msg.arg.remove.start = start;
+       ewq.msg.arg.remove.end = end;
 
        userfaultfd_event_wait_completion(ctx, &ewq);
 
        down_read(&mm->mmap_sem);
 }
 
+static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
+                         unsigned long start, unsigned long end)
+{
+       struct userfaultfd_unmap_ctx *unmap_ctx;
+
+       list_for_each_entry(unmap_ctx, unmaps, list)
+               if (unmap_ctx->ctx == ctx && unmap_ctx->start == start &&
+                   unmap_ctx->end == end)
+                       return true;
+
+       return false;
+}
+
+int userfaultfd_unmap_prep(struct vm_area_struct *vma,
+                          unsigned long start, unsigned long end,
+                          struct list_head *unmaps)
+{
+       for ( ; vma && vma->vm_start < end; vma = vma->vm_next) {
+               struct userfaultfd_unmap_ctx *unmap_ctx;
+               struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
+
+               if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
+                   has_unmap_ctx(ctx, unmaps, start, end))
+                       continue;
+
+               unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
+               if (!unmap_ctx)
+                       return -ENOMEM;
+
+               userfaultfd_ctx_get(ctx);
+               unmap_ctx->ctx = ctx;
+               unmap_ctx->start = start;
+               unmap_ctx->end = end;
+               list_add_tail(&unmap_ctx->list, unmaps);
+       }
+
+       return 0;
+}
+
+void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf)
+{
+       struct userfaultfd_unmap_ctx *ctx, *n;
+       struct userfaultfd_wait_queue ewq;
+
+       list_for_each_entry_safe(ctx, n, uf, list) {
+               msg_init(&ewq.msg);
+
+               ewq.msg.event = UFFD_EVENT_UNMAP;
+               ewq.msg.arg.remove.start = ctx->start;
+               ewq.msg.arg.remove.end = ctx->end;
+
+               userfaultfd_event_wait_completion(ctx->ctx, &ewq);
+
+               list_del(&ctx->list);
+               kfree(ctx);
+       }
+}
+
+void userfaultfd_exit(struct mm_struct *mm)
+{
+       struct vm_area_struct *vma = mm->mmap;
+
+       /*
+        * We can do the vma walk without locking because the caller
+        * (exit_mm) knows it now has exclusive access
+        */
+       while (vma) {
+               struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
+
+               if (ctx && (ctx->features & UFFD_FEATURE_EVENT_EXIT)) {
+                       struct userfaultfd_wait_queue ewq;
+
+                       userfaultfd_ctx_get(ctx);
+
+                       msg_init(&ewq.msg);
+                       ewq.msg.event = UFFD_EVENT_EXIT;
+
+                       userfaultfd_event_wait_completion(ctx, &ewq);
+
+                       ctx->features &= ~UFFD_FEATURE_EVENT_EXIT;
+               }
+
+               vma = vma->vm_next;
+       }
+}
+
 static int userfaultfd_release(struct inode *inode, struct file *file)
 {
        struct userfaultfd_ctx *ctx = file->private_data;
@@ -1514,6 +1608,8 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
                ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
                                   uffdio_copy.len);
                mmput(ctx->mm);
+       } else {
+               return -ENOSPC;
        }
        if (unlikely(put_user(ret, &user_uffdio_copy->copy)))
                return -EFAULT;
@@ -1712,17 +1808,17 @@ static void init_once_userfaultfd_ctx(void *mem)
 }
 
 /**
- * userfaultfd_file_create - Creates an userfaultfd file pointer.
+ * userfaultfd_file_create - Creates a userfaultfd file pointer.
  * @flags: Flags for the userfaultfd file.
  *
- * This function creates an userfaultfd file pointer, w/out installing
+ * This function creates a userfaultfd file pointer, w/out installing
  * it into the fd table. This is useful when the userfaultfd file is
  * used during the initialization of data structures that require
  * extra setup after the userfaultfd creation. So the userfaultfd
  * creation is split into the file pointer creation phase, and the
  * file descriptor installation phase.  In this way races with
  * userspace closing the newly installed file descriptor can be
- * avoided.  Returns an userfaultfd file pointer, or a proper error
+ * avoided.  Returns a userfaultfd file pointer, or a proper error
  * pointer.
  */
 static struct file *userfaultfd_file_create(int flags)
@@ -1752,7 +1848,7 @@ static struct file *userfaultfd_file_create(int flags)
        ctx->released = false;
        ctx->mm = current->mm;
        /* prevent the mm struct to be freed */
-       atomic_inc(&ctx->mm->mm_count);
+       mmgrab(ctx->mm);
 
        file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx,
                                  O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS));