Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
[sfrench/cifs-2.6.git] / drivers / vhost / vdpa.c
index 796fe979f997f0ad65cf43567118e05798a8bcc5..62a9bb0efc55897412ba995d2cd830b1f8da6332 100644 (file)
@@ -565,6 +565,9 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
                              perm_to_iommu_flags(perm));
        }
 
+       if (r)
+               vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+
        return r;
 }
 
@@ -592,21 +595,19 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
        struct vhost_dev *dev = &v->vdev;
        struct vhost_iotlb *iotlb = dev->iotlb;
        struct page **page_list;
-       unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
+       struct vm_area_struct **vmas;
        unsigned int gup_flags = FOLL_LONGTERM;
-       unsigned long npages, cur_base, map_pfn, last_pfn = 0;
-       unsigned long locked, lock_limit, pinned, i;
+       unsigned long map_pfn, last_pfn = 0;
+       unsigned long npages, lock_limit;
+       unsigned long i, nmap = 0;
        u64 iova = msg->iova;
+       long pinned;
        int ret = 0;
 
        if (vhost_iotlb_itree_first(iotlb, msg->iova,
                                    msg->iova + msg->size - 1))
                return -EEXIST;
 
-       page_list = (struct page **) __get_free_page(GFP_KERNEL);
-       if (!page_list)
-               return -ENOMEM;
-
        if (msg->perm & VHOST_ACCESS_WO)
                gup_flags |= FOLL_WRITE;
 
@@ -614,61 +615,86 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
        if (!npages)
                return -EINVAL;
 
+       page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
+       vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *),
+                             GFP_KERNEL);
+       if (!page_list || !vmas) {
+               ret = -ENOMEM;
+               goto free;
+       }
+
        mmap_read_lock(dev->mm);
 
-       locked = atomic64_add_return(npages, &dev->mm->pinned_vm);
        lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-       if (locked > lock_limit) {
+       if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
                ret = -ENOMEM;
-               goto out;
+               goto unlock;
        }
 
-       cur_base = msg->uaddr & PAGE_MASK;
-       iova &= PAGE_MASK;
+       pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags,
+                               page_list, vmas);
+       if (npages != pinned) {
+               if (pinned < 0) {
+                       ret = pinned;
+               } else {
+                       unpin_user_pages(page_list, pinned);
+                       ret = -ENOMEM;
+               }
+               goto unlock;
+       }
 
-       while (npages) {
-               pinned = min_t(unsigned long, npages, list_size);
-               ret = pin_user_pages(cur_base, pinned,
-                                    gup_flags, page_list, NULL);
-               if (ret != pinned)
-                       goto out;
-
-               if (!last_pfn)
-                       map_pfn = page_to_pfn(page_list[0]);
-
-               for (i = 0; i < ret; i++) {
-                       unsigned long this_pfn = page_to_pfn(page_list[i]);
-                       u64 csize;
-
-                       if (last_pfn && (this_pfn != last_pfn + 1)) {
-                               /* Pin a contiguous chunk of memory */
-                               csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
-                               if (vhost_vdpa_map(v, iova, csize,
-                                                  map_pfn << PAGE_SHIFT,
-                                                  msg->perm))
-                                       goto out;
-                               map_pfn = this_pfn;
-                               iova += csize;
+       iova &= PAGE_MASK;
+       map_pfn = page_to_pfn(page_list[0]);
+
+       /* One more iteration to avoid extra vdpa_map() call out of loop. */
+       for (i = 0; i <= npages; i++) {
+               unsigned long this_pfn;
+               u64 csize;
+
+               /* The last chunk may have no valid PFN next to it */
+               this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL;
+
+               if (last_pfn && (this_pfn == -1UL ||
+                                this_pfn != last_pfn + 1)) {
+                       /* Pin a contiguous chunk of memory */
+                       csize = last_pfn - map_pfn + 1;
+                       ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT,
+                                            map_pfn << PAGE_SHIFT,
+                                            msg->perm);
+                       if (ret) {
+                               /*
+                                * Unpin the rest chunks of memory on the
+                                * flight with no corresponding vdpa_map()
+                                * calls having been made yet. On the other
+                                * hand, vdpa_unmap() in the failure path
+                                * is in charge of accounting the number of
+                                * pinned pages for its own.
+                                * This asymmetrical pattern of accounting
+                                * is for efficiency to pin all pages at
+                                * once, while there is no other callsite
+                                * of vdpa_map() than here above.
+                                */
+                               unpin_user_pages(&page_list[nmap],
+                                                npages - nmap);
+                               goto out;
                        }
-
-                       last_pfn = this_pfn;
+                       atomic64_add(csize, &dev->mm->pinned_vm);
+                       nmap += csize;
+                       iova += csize << PAGE_SHIFT;
+                       map_pfn = this_pfn;
                }
-
-               cur_base += ret << PAGE_SHIFT;
-               npages -= ret;
+               last_pfn = this_pfn;
        }
 
-       /* Pin the rest chunk */
-       ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
-                            map_pfn << PAGE_SHIFT, msg->perm);
+       WARN_ON(nmap != npages);
 out:
-       if (ret) {
+       if (ret)
                vhost_vdpa_unmap(v, msg->iova, msg->size);
-               atomic64_sub(npages, &dev->mm->pinned_vm);
-       }
+unlock:
        mmap_read_unlock(dev->mm);
-       free_page((unsigned long)page_list);
+free:
+       kvfree(vmas);
+       kvfree(page_list);
        return ret;
 }
 
@@ -810,6 +836,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
 
 err_init_iotlb:
        vhost_dev_cleanup(&v->vdev);
+       kfree(vqs);
 err:
        atomic_dec(&v->opened);
        return r;