Merge branch 'for-linus' of git://git.kernel.dk/data/git/linux-2.6-block
[sfrench/cifs-2.6.git] / fs / splice.c
index 0a09732180847ce6cb7d67f91477c2314296003a..59a941d404d9c60b46a83ced4472ee915a7fe93c 100644 (file)
@@ -164,7 +164,7 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
  * @spd:       data to fill
  *
  * Description:
- *    @spd contains a map of pages and len/offset tupples, a long with
+ *    @spd contains a map of pages and len/offset tuples, along with
  *    the struct pipe_buf_operations associated with these pages. This
  *    function will link that data to the pipe.
  *
@@ -447,7 +447,7 @@ fill_it:
         */
        while (page_nr < nr_pages)
                page_cache_release(pages[page_nr++]);
-       in->f_ra.prev_index = index;
+       in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
 
        if (spd.nr_pages)
                return splice_to_pipe(pipe, &spd);
@@ -563,7 +563,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
        struct address_space *mapping = file->f_mapping;
        unsigned int offset, this_len;
        struct page *page;
-       pgoff_t index;
+       void *fsdata;
        int ret;
 
        /*
@@ -573,49 +573,16 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
        if (unlikely(ret))
                return ret;
 
-       index = sd->pos >> PAGE_CACHE_SHIFT;
        offset = sd->pos & ~PAGE_CACHE_MASK;
 
        this_len = sd->len;
        if (this_len + offset > PAGE_CACHE_SIZE)
                this_len = PAGE_CACHE_SIZE - offset;
 
-find_page:
-       page = find_lock_page(mapping, index);
-       if (!page) {
-               ret = -ENOMEM;
-               page = page_cache_alloc_cold(mapping);
-               if (unlikely(!page))
-                       goto out_ret;
-
-               /*
-                * This will also lock the page
-                */
-               ret = add_to_page_cache_lru(page, mapping, index,
-                                           GFP_KERNEL);
-               if (unlikely(ret))
-                       goto out_release;
-       }
-
-       ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
-       if (unlikely(ret)) {
-               loff_t isize = i_size_read(mapping->host);
-
-               if (ret != AOP_TRUNCATED_PAGE)
-                       unlock_page(page);
-               page_cache_release(page);
-               if (ret == AOP_TRUNCATED_PAGE)
-                       goto find_page;
-
-               /*
-                * prepare_write() may have instantiated a few blocks
-                * outside i_size.  Trim these off again.
-                */
-               if (sd->pos + this_len > isize)
-                       vmtruncate(mapping->host, isize);
-
-               goto out_ret;
-       }
+       ret = pagecache_write_begin(file, mapping, sd->pos, this_len,
+                               AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
+       if (unlikely(ret))
+               goto out;
 
        if (buf->page != page) {
                /*
@@ -629,31 +596,9 @@ find_page:
                kunmap_atomic(dst, KM_USER1);
                buf->ops->unmap(pipe, buf, src);
        }
-
-       ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
-       if (ret) {
-               if (ret == AOP_TRUNCATED_PAGE) {
-                       page_cache_release(page);
-                       goto find_page;
-               }
-               if (ret < 0)
-                       goto out;
-               /*
-                * Partial write has happened, so 'ret' already initialized by
-                * number of bytes written, Where is nothing we have to do here.
-                */
-       } else
-               ret = this_len;
-       /*
-        * Return the number of bytes written and mark page as
-        * accessed, we are now done!
-        */
-       mark_page_accessed(page);
+       ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
+                               page, fsdata);
 out:
-       unlock_page(page);
-out_release:
-       page_cache_release(page);
-out_ret:
        return ret;
 }
 
@@ -1000,7 +945,7 @@ static long do_splice_to(struct file *in, loff_t *ppos,
  * Description:
  *    This is a special case helper to splice directly between two
  *    points, without requiring an explicit pipe. Internally an allocated
- *    pipe is cached in the process, and reused during the life time of
+ *    pipe is cached in the process, and reused during the lifetime of
  *    that process.
  *
  */
@@ -1223,6 +1168,33 @@ static long do_splice(struct file *in, loff_t __user *off_in,
        return -EINVAL;
 }
 
+/*
+ * Do a copy-from-user while holding the mmap_semaphore for reading, in a
+ * manner safe from deadlocking with simultaneous mmap() (grabbing mmap_sem
+ * for writing) and page faulting on the user memory pointed to by src.
+ * This assumes that we will very rarely hit the partial != 0 path, or this
+ * will not be a win.
+ */
+static int copy_from_user_mmap_sem(void *dst, const void __user *src, size_t n)
+{
+       int partial;
+
+       pagefault_disable();
+       partial = __copy_from_user_inatomic(dst, src, n);
+       pagefault_enable();
+
+       /*
+        * Didn't copy everything, drop the mmap_sem and do a faulting copy
+        */
+       if (unlikely(partial)) {
+               up_read(&current->mm->mmap_sem);
+               partial = copy_from_user(dst, src, n);
+               down_read(&current->mm->mmap_sem);
+       }
+
+       return partial;
+}
+
 /*
  * Map an iov into an array of pages and offset/length tupples. With the
  * partial_page structure, we can map several non-contiguous ranges into
@@ -1236,31 +1208,26 @@ static int get_iovec_page_array(const struct iovec __user *iov,
 {
        int buffers = 0, error = 0;
 
-       /*
-        * It's ok to take the mmap_sem for reading, even
-        * across a "get_user()".
-        */
        down_read(&current->mm->mmap_sem);
 
        while (nr_vecs) {
                unsigned long off, npages;
+               struct iovec entry;
                void __user *base;
                size_t len;
                int i;
 
-               /*
-                * Get user address base and length for this iovec.
-                */
-               error = get_user(base, &iov->iov_base);
-               if (unlikely(error))
-                       break;
-               error = get_user(len, &iov->iov_len);
-               if (unlikely(error))
+               error = -EFAULT;
+               if (copy_from_user_mmap_sem(&entry, iov, sizeof(entry)))
                        break;
 
+               base = entry.iov_base;
+               len = entry.iov_len;
+
                /*
                 * Sanity check this iovec. 0 read succeeds.
                 */
+               error = 0;
                if (unlikely(!len))
                        break;
                error = -EFAULT;
@@ -1368,10 +1335,10 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
        if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
                ret = -EFAULT;
 
+       buf->ops->unmap(pipe, buf, src);
 out:
        if (ret > 0)
                sd->u.userptr += ret;
-       buf->ops->unmap(pipe, buf, src);
        return ret;
 }