Merge branch 'splice-2.6.23' of git://git.kernel.dk/data/git/linux-2.6-block
authorLinus Torvalds <torvalds@woody.linux-foundation.org>
Tue, 10 Jul 2007 20:51:06 +0000 (13:51 -0700)
committerLinus Torvalds <torvalds@woody.linux-foundation.org>
Tue, 10 Jul 2007 20:51:06 +0000 (13:51 -0700)
* 'splice-2.6.23' of git://git.kernel.dk/data/git/linux-2.6-block:
  pipe: add documentation and comments
  pipe: change the ->pin() operation to ->confirm()
  Remove remnants of sendfile()
  xip sendfile removal
  splice: completely document external interface with kerneldoc
  sendfile: remove bad_sendfile() from bad_file_ops
  shmem: convert to using splice instead of sendfile()
  relay: use splice_to_pipe() instead of open-coding the pipe loop
  pipe: allow passing around of ops private pointer
  splice: divorce the splice structure/function definitions from the pipe header
  splice: relay support
  sendfile: convert nfsd to splice_direct_to_actor()
  sendfile: convert nfs to using splice_read()
  loop: convert to using splice_direct_to_actor() instead of sendfile()
  splice: add void cookie to the actor data
  sendfile: kill generic_file_sendfile()
  sendfile: remove .sendfile from filesystems that use generic_file_sendfile()
  sys_sendfile: switch to using ->splice_read, if available
  vmsplice: add vmsplice-to-user support
  splice: abstract out actor data

56 files changed:
Documentation/DocBook/kernel-api.tmpl
drivers/block/loop.c
drivers/char/mem.c
fs/adfs/file.c
fs/affs/file.c
fs/afs/file.c
fs/bad_inode.c
fs/bfs/file.c
fs/block_dev.c
fs/cifs/cifsfs.c
fs/coda/file.c
fs/ecryptfs/file.c
fs/ext2/file.c
fs/ext3/file.c
fs/ext4/file.c
fs/fat/file.c
fs/fuse/file.c
fs/gfs2/ops_file.c
fs/hfs/inode.c
fs/hfsplus/inode.c
fs/hostfs/hostfs_kern.c
fs/hpfs/file.c
fs/jffs2/file.c
fs/jfs/file.c
fs/minix/file.c
fs/nfs/file.c
fs/nfsd/vfs.c
fs/ntfs/file.c
fs/ocfs2/file.c
fs/pipe.c
fs/qnx4/file.c
fs/ramfs/file-mmu.c
fs/ramfs/file-nommu.c
fs/read_write.c
fs/reiserfs/file.c
fs/smbfs/file.c
fs/splice.c
fs/sysv/file.c
fs/udf/file.c
fs/ufs/file.c
fs/xfs/linux-2.6/xfs_file.c
fs/xfs/linux-2.6/xfs_linux.h
fs/xfs/linux-2.6/xfs_lrw.c
fs/xfs/linux-2.6/xfs_lrw.h
fs/xfs/linux-2.6/xfs_vnode.h
fs/xfs/xfs_vnodeops.c
include/linux/fs.h
include/linux/pipe_fs_i.h
include/linux/splice.h [new file with mode: 0644]
include/linux/sunrpc/svc.h
kernel/relay.c
mm/filemap.c
mm/filemap_xip.c
mm/shmem.c
net/sunrpc/auth_gss/svcauth_gss.c
net/sunrpc/svc.c

index 38f88b6ae405a16606134db8aedf7a89ccf8e643..8c5698a8c2e1e071060511353ce457da215f0e5e 100644 (file)
@@ -643,4 +643,15 @@ X!Idrivers/video/console/fonts.c
 !Edrivers/spi/spi.c
   </chapter>
 
+  <chapter id="splice">
+      <title>splice API</title>
+  <para>)
+       splice is a method for moving blocks of data around inside the
+       kernel, without continually transferring it between the kernel
+       and user space.
+  </para>
+!Iinclude/linux/splice.h
+!Ffs/splice.c
+  </chapter>
+
 </book>
index 0ed5470d25339674a5c449feab66f5505db5f3c2..4503290da4078c2916bafdc8af301fac333f5349 100644 (file)
@@ -74,6 +74,7 @@
 #include <linux/highmem.h>
 #include <linux/gfp.h>
 #include <linux/kthread.h>
+#include <linux/splice.h>
 
 #include <asm/uaccess.h>
 
@@ -401,50 +402,73 @@ struct lo_read_data {
 };
 
 static int
-lo_read_actor(read_descriptor_t *desc, struct page *page,
-             unsigned long offset, unsigned long size)
+lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+               struct splice_desc *sd)
 {
-       unsigned long count = desc->count;
-       struct lo_read_data *p = desc->arg.data;
+       struct lo_read_data *p = sd->u.data;
        struct loop_device *lo = p->lo;
+       struct page *page = buf->page;
        sector_t IV;
+       size_t size;
+       int ret;
 
-       IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
+       ret = buf->ops->confirm(pipe, buf);
+       if (unlikely(ret))
+               return ret;
 
-       if (size > count)
-               size = count;
+       IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
+                                                       (buf->offset >> 9);
+       size = sd->len;
+       if (size > p->bsize)
+               size = p->bsize;
 
-       if (lo_do_transfer(lo, READ, page, offset, p->page, p->offset, size, IV)) {
-               size = 0;
+       if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) {
                printk(KERN_ERR "loop: transfer error block %ld\n",
                       page->index);
-               desc->error = -EINVAL;
+               size = -EINVAL;
        }
 
        flush_dcache_page(p->page);
 
-       desc->count = count - size;
-       desc->written += size;
-       p->offset += size;
+       if (size > 0)
+               p->offset += size;
+
        return size;
 }
 
+static int
+lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
+{
+       return __splice_from_pipe(pipe, sd, lo_splice_actor);
+}
+
 static int
 do_lo_receive(struct loop_device *lo,
              struct bio_vec *bvec, int bsize, loff_t pos)
 {
        struct lo_read_data cookie;
+       struct splice_desc sd;
        struct file *file;
-       int retval;
+       long retval;
 
        cookie.lo = lo;
        cookie.page = bvec->bv_page;
        cookie.offset = bvec->bv_offset;
        cookie.bsize = bsize;
+
+       sd.len = 0;
+       sd.total_len = bvec->bv_len;
+       sd.flags = 0;
+       sd.pos = pos;
+       sd.u.data = &cookie;
+
        file = lo->lo_backing_file;
-       retval = file->f_op->sendfile(file, &pos, bvec->bv_len,
-                       lo_read_actor, &cookie);
-       return (retval < 0)? retval: 0;
+       retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor);
+
+       if (retval < 0)
+               return retval;
+
+       return 0;
 }
 
 static int
@@ -679,8 +703,8 @@ static int loop_change_fd(struct loop_device *lo, struct file *lo_file,
        if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
                goto out_putf;
 
-       /* new backing store needs to support loop (eg sendfile) */
-       if (!inode->i_fop->sendfile)
+       /* new backing store needs to support loop (eg splice_read) */
+       if (!inode->i_fop->splice_read)
                goto out_putf;
 
        /* size of the new backing store needs to be the same */
@@ -760,7 +784,7 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
                 * If we can't read - sorry. If we only can't write - well,
                 * it's going to be read-only.
                 */
-               if (!file->f_op->sendfile)
+               if (!file->f_op->splice_read)
                        goto out_putf;
                if (aops->prepare_write && aops->commit_write)
                        lo_flags |= LO_FLAGS_USE_AOPS;
index cc9a9d0df979f5dcf69b4e70d4f7945aae68a490..d2e4cfd79f27daf0001dcff37164bba0a901404c 100644 (file)
@@ -24,7 +24,7 @@
 #include <linux/crash_dump.h>
 #include <linux/backing-dev.h>
 #include <linux/bootmem.h>
-#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>
 #include <linux/pfn.h>
 
 #include <asm/uaccess.h>
index f544a285592343d4ac4cb9d1455a2c4e602cc2b9..36e381c6a99a69a761973cf3e61c38f226e00ee3 100644 (file)
@@ -33,7 +33,7 @@ const struct file_operations adfs_file_operations = {
        .fsync          = file_fsync,
        .write          = do_sync_write,
        .aio_write      = generic_file_aio_write,
-       .sendfile       = generic_file_sendfile,
+       .splice_read    = generic_file_splice_read,
 };
 
 const struct inode_operations adfs_file_inode_operations = {
index c8796906f584bdb2bffe406e186b4f17e6aa5c75..c314a35f09187bac060460a2fe948b4434f36c71 100644 (file)
@@ -35,7 +35,7 @@ const struct file_operations affs_file_operations = {
        .open           = affs_file_open,
        .release        = affs_file_release,
        .fsync          = file_fsync,
-       .sendfile       = generic_file_sendfile,
+       .splice_read    = generic_file_splice_read,
 };
 
 const struct inode_operations affs_file_inode_operations = {
index 9c0e721d9fc219fff078507b879b92b7ae41b1fb..aede7eb66dd4f970a74021b41487a5a0a70c0616 100644 (file)
@@ -32,7 +32,7 @@ const struct file_operations afs_file_operations = {
        .aio_read       = generic_file_aio_read,
        .aio_write      = afs_file_write,
        .mmap           = generic_file_readonly_mmap,
-       .sendfile       = generic_file_sendfile,
+       .splice_read    = generic_file_splice_read,
        .fsync          = afs_fsync,
 };
 
index 329ee473eede9514d2b6511aa9d41fc3e3884beb..521ff7caadbd687ada5ed347edc696d70168392f 100644 (file)
@@ -114,12 +114,6 @@ static int bad_file_lock(struct file *file, int cmd, struct file_lock *fl)
        return -EIO;
 }
 
-static ssize_t bad_file_sendfile(struct file *in_file, loff_t *ppos,
-                       size_t count, read_actor_t actor, void *target)
-{
-       return -EIO;
-}
-
 static ssize_t bad_file_sendpage(struct file *file, struct page *page,
                        int off, size_t len, loff_t *pos, int more)
 {
@@ -182,7 +176,6 @@ static const struct file_operations bad_file_ops =
        .aio_fsync      = bad_file_aio_fsync,
        .fasync         = bad_file_fasync,
        .lock           = bad_file_lock,
-       .sendfile       = bad_file_sendfile,
        .sendpage       = bad_file_sendpage,
        .get_unmapped_area = bad_file_get_unmapped_area,
        .check_flags    = bad_file_check_flags,
index ef4d1fa04e654f3f792840cff4db34ab82b9a1f0..24310e9ee05ad343245c0d9e881f150006c8e69f 100644 (file)
@@ -24,7 +24,7 @@ const struct file_operations bfs_file_operations = {
        .write          = do_sync_write,
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
-       .sendfile       = generic_file_sendfile,
+       .splice_read    = generic_file_splice_read,
 };
 
 static int bfs_move_block(unsigned long from, unsigned long to, struct super_block *sb)
index ea1480a16f517d85d12de02bfde8afa53a43037c..b3e9bfa748cf99971567913492428524aa847e2b 100644 (file)
@@ -1346,7 +1346,6 @@ const struct file_operations def_blk_fops = {
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = compat_blkdev_ioctl,
 #endif
-       .sendfile       = generic_file_sendfile,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
 };
index 7c04752b76cb88f1678e3f76cd4a2642a4a1a929..8b0cbf4a4ad0aa33dfecf6100afeb139151e5b03 100644 (file)
@@ -616,7 +616,7 @@ const struct file_operations cifs_file_ops = {
        .fsync = cifs_fsync,
        .flush = cifs_flush,
        .mmap  = cifs_file_mmap,
-       .sendfile = generic_file_sendfile,
+       .splice_read = generic_file_splice_read,
        .llseek = cifs_llseek,
 #ifdef CONFIG_CIFS_POSIX
        .ioctl  = cifs_ioctl,
@@ -637,7 +637,7 @@ const struct file_operations cifs_file_direct_ops = {
        .lock = cifs_lock,
        .fsync = cifs_fsync,
        .flush = cifs_flush,
-       .sendfile = generic_file_sendfile, /* BB removeme BB */
+       .splice_read = generic_file_splice_read,
 #ifdef CONFIG_CIFS_POSIX
        .ioctl  = cifs_ioctl,
 #endif /* CONFIG_CIFS_POSIX */
@@ -656,7 +656,7 @@ const struct file_operations cifs_file_nobrl_ops = {
        .fsync = cifs_fsync,
        .flush = cifs_flush,
        .mmap  = cifs_file_mmap,
-       .sendfile = generic_file_sendfile,
+       .splice_read = generic_file_splice_read,
        .llseek = cifs_llseek,
 #ifdef CONFIG_CIFS_POSIX
        .ioctl  = cifs_ioctl,
@@ -676,7 +676,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
        .release = cifs_close,
        .fsync = cifs_fsync,
        .flush = cifs_flush,
-       .sendfile = generic_file_sendfile, /* BB removeme BB */
+       .splice_read = generic_file_splice_read,
 #ifdef CONFIG_CIFS_POSIX
        .ioctl  = cifs_ioctl,
 #endif /* CONFIG_CIFS_POSIX */
index 5ef2b609ec7dd3164f3f70bfac44a281105f2691..99dbe866816d277ba5c90ccbf04ff92567725bee 100644 (file)
@@ -47,8 +47,9 @@ coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *p
 }
 
 static ssize_t
-coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count,
-                  read_actor_t actor, void *target)
+coda_file_splice_read(struct file *coda_file, loff_t *ppos,
+                     struct pipe_inode_info *pipe, size_t count,
+                     unsigned int flags)
 {
        struct coda_file_info *cfi;
        struct file *host_file;
@@ -57,10 +58,10 @@ coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count,
        BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
        host_file = cfi->cfi_container;
 
-       if (!host_file->f_op || !host_file->f_op->sendfile)
+       if (!host_file->f_op || !host_file->f_op->splice_read)
                return -EINVAL;
 
-       return host_file->f_op->sendfile(host_file, ppos, count, actor, target);
+       return host_file->f_op->splice_read(host_file, ppos, pipe, count,flags);
 }
 
 static ssize_t
@@ -295,6 +296,6 @@ const struct file_operations coda_file_operations = {
        .flush          = coda_flush,
        .release        = coda_release,
        .fsync          = coda_fsync,
-       .sendfile       = coda_file_sendfile,
+       .splice_read    = coda_file_splice_read,
 };
 
index 59288d817078879ad36c1ece17a51208488d571e..94f456fe4d9b70487686a707b9954d49895a3fb4 100644 (file)
@@ -338,16 +338,17 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
        return rc;
 }
 
-static ssize_t ecryptfs_sendfile(struct file *file, loff_t * ppos,
-                                size_t count, read_actor_t actor, void *target)
+static ssize_t ecryptfs_splice_read(struct file *file, loff_t * ppos,
+                                   struct pipe_inode_info *pipe, size_t count,
+                                   unsigned int flags)
 {
        struct file *lower_file = NULL;
        int rc = -EINVAL;
 
        lower_file = ecryptfs_file_to_lower(file);
-       if (lower_file->f_op && lower_file->f_op->sendfile)
-               rc = lower_file->f_op->sendfile(lower_file, ppos, count,
-                                               actor, target);
+       if (lower_file->f_op && lower_file->f_op->splice_read)
+               rc = lower_file->f_op->splice_read(lower_file, ppos, pipe,
+                                               count, flags);
 
        return rc;
 }
@@ -364,7 +365,7 @@ const struct file_operations ecryptfs_dir_fops = {
        .release = ecryptfs_release,
        .fsync = ecryptfs_fsync,
        .fasync = ecryptfs_fasync,
-       .sendfile = ecryptfs_sendfile,
+       .splice_read = ecryptfs_splice_read,
 };
 
 const struct file_operations ecryptfs_main_fops = {
@@ -381,7 +382,7 @@ const struct file_operations ecryptfs_main_fops = {
        .release = ecryptfs_release,
        .fsync = ecryptfs_fsync,
        .fasync = ecryptfs_fasync,
-       .sendfile = ecryptfs_sendfile,
+       .splice_read = ecryptfs_splice_read,
 };
 
 static int
index 566d4e2d3852353d4e54032c417b6bd593d72fa2..04afeecaaef31acde466b50f48ea6057ac3f98a9 100644 (file)
@@ -53,7 +53,6 @@ const struct file_operations ext2_file_operations = {
        .open           = generic_file_open,
        .release        = ext2_release_file,
        .fsync          = ext2_sync_file,
-       .sendfile       = generic_file_sendfile,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
 };
@@ -71,7 +70,6 @@ const struct file_operations ext2_xip_file_operations = {
        .open           = generic_file_open,
        .release        = ext2_release_file,
        .fsync          = ext2_sync_file,
-       .sendfile       = xip_file_sendfile,
 };
 #endif
 
index 1e6f13864536e7180a465ace2ce6555256d7292f..acc4913d30199079007726e8c4a49b4a07dd77cc 100644 (file)
@@ -120,7 +120,6 @@ const struct file_operations ext3_file_operations = {
        .open           = generic_file_open,
        .release        = ext3_release_file,
        .fsync          = ext3_sync_file,
-       .sendfile       = generic_file_sendfile,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
 };
index 3c6c1fd2be902524eb1901776aa8a0b45db4a167..d4c8186aed646f6a4439b17d0b0cda9655aadf55 100644 (file)
@@ -120,7 +120,6 @@ const struct file_operations ext4_file_operations = {
        .open           = generic_file_open,
        .release        = ext4_release_file,
        .fsync          = ext4_sync_file,
-       .sendfile       = generic_file_sendfile,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
 };
index 55d3c7461c5b9acb50b851fae64df4a68dcd3d01..69a83b59dce80bdc84f1d4507e1ad74195c38690 100644 (file)
@@ -134,7 +134,7 @@ const struct file_operations fat_file_operations = {
        .release        = fat_file_release,
        .ioctl          = fat_generic_ioctl,
        .fsync          = file_fsync,
-       .sendfile       = generic_file_sendfile,
+       .splice_read    = generic_file_splice_read,
 };
 
 static int fat_cont_expand(struct inode *inode, loff_t size)
index adf7995232b8b851250ed83839fdd5436c85c143..f79de7c8cdfaae3260009c86127f88ae4283cc22 100644 (file)
@@ -802,7 +802,7 @@ static const struct file_operations fuse_file_operations = {
        .release        = fuse_release,
        .fsync          = fuse_fsync,
        .lock           = fuse_file_lock,
-       .sendfile       = generic_file_sendfile,
+       .splice_read    = generic_file_splice_read,
 };
 
 static const struct file_operations fuse_direct_io_file_operations = {
@@ -814,7 +814,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
        .release        = fuse_release,
        .fsync          = fuse_fsync,
        .lock           = fuse_file_lock,
-       /* no mmap and sendfile */
+       /* no mmap and splice_read */
 };
 
 static const struct address_space_operations fuse_file_aops  = {
index 064df8804582b1f99b9003a78fb2ff39b2a2c254..7dc3be10820412f338846f3c08e4408113b9506b 100644 (file)
@@ -635,7 +635,6 @@ const struct file_operations gfs2_file_fops = {
        .release        = gfs2_close,
        .fsync          = gfs2_fsync,
        .lock           = gfs2_lock,
-       .sendfile       = generic_file_sendfile,
        .flock          = gfs2_flock,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
index 9a934db0bd8ae927509a9c168ff1ee07cc167562..bc835f272a6e0a9e017aac7517e672a4ea0db5bb 100644 (file)
@@ -607,7 +607,7 @@ static const struct file_operations hfs_file_operations = {
        .write          = do_sync_write,
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
-       .sendfile       = generic_file_sendfile,
+       .splice_read    = generic_file_splice_read,
        .fsync          = file_fsync,
        .open           = hfs_file_open,
        .release        = hfs_file_release,
index 45dab5d6cc10a25f7e37a80d847566f6c4001a66..409ce5429c91ccb8b53d5a9fff6ef0118546722d 100644 (file)
@@ -288,7 +288,7 @@ static const struct file_operations hfsplus_file_operations = {
        .write          = do_sync_write,
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
-       .sendfile       = generic_file_sendfile,
+       .splice_read    = generic_file_splice_read,
        .fsync          = file_fsync,
        .open           = hfsplus_file_open,
        .release        = hfsplus_file_release,
index 8286491dbf31dbccca606b241ae755d5cd77cee8..c77862032e844262a199549d01ca16489b0578f8 100644 (file)
@@ -390,7 +390,7 @@ int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 static const struct file_operations hostfs_file_fops = {
        .llseek         = generic_file_llseek,
        .read           = do_sync_read,
-       .sendfile       = generic_file_sendfile,
+       .splice_read    = generic_file_splice_read,
        .aio_read       = generic_file_aio_read,
        .aio_write      = generic_file_aio_write,
        .write          = do_sync_write,
index b4eafc0f1e54b40007b9831a6661012cfaa0ebe0..5b53e5c5d8dfb4cbdd4808efa6fa945a4ec4a51f 100644 (file)
@@ -129,7 +129,7 @@ const struct file_operations hpfs_file_ops =
        .mmap           = generic_file_mmap,
        .release        = hpfs_file_release,
        .fsync          = hpfs_file_fsync,
-       .sendfile       = generic_file_sendfile,
+       .splice_read    = generic_file_splice_read,
 };
 
 const struct inode_operations hpfs_file_iops =
index 99871279a1edf167aa6abd63c9c1d948a56bb4e3..c2530197be0c8d6db52a3fcf2ada23ca33196a02 100644 (file)
@@ -47,7 +47,7 @@ const struct file_operations jffs2_file_operations =
        .ioctl =        jffs2_ioctl,
        .mmap =         generic_file_readonly_mmap,
        .fsync =        jffs2_fsync,
-       .sendfile =     generic_file_sendfile
+       .splice_read =  generic_file_splice_read,
 };
 
 /* jffs2_file_inode_operations */
index f7f8eff19b7b2c4358fc58053b827fb5becd8d37..87eb93694af75832c792f1863c3edbe13d6c5c56 100644 (file)
@@ -108,7 +108,6 @@ const struct file_operations jfs_file_operations = {
        .aio_read       = generic_file_aio_read,
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
-       .sendfile       = generic_file_sendfile,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
        .fsync          = jfs_fsync,
index f92baa1d757053cd7f1b3964bf73cafcd74f7047..17765f697e50f68f7a29906b8f4aef36709abb29 100644 (file)
@@ -23,7 +23,7 @@ const struct file_operations minix_file_operations = {
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
        .fsync          = minix_sync_file,
-       .sendfile       = generic_file_sendfile,
+       .splice_read    = generic_file_splice_read,
 };
 
 const struct inode_operations minix_file_inode_operations = {
index 9eb8eb4e4a08df9db62c8301d8c4622df45bf10c..8689b736fdd98cfa8b648f92e9922c52499a6fb3 100644 (file)
@@ -41,7 +41,9 @@ static int nfs_file_open(struct inode *, struct file *);
 static int nfs_file_release(struct inode *, struct file *);
 static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
 static int  nfs_file_mmap(struct file *, struct vm_area_struct *);
-static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
+static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos,
+                                       struct pipe_inode_info *pipe,
+                                       size_t count, unsigned int flags);
 static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov,
                                unsigned long nr_segs, loff_t pos);
 static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
@@ -65,7 +67,7 @@ const struct file_operations nfs_file_operations = {
        .fsync          = nfs_fsync,
        .lock           = nfs_lock,
        .flock          = nfs_flock,
-       .sendfile       = nfs_file_sendfile,
+       .splice_read    = nfs_file_splice_read,
        .check_flags    = nfs_check_flags,
 };
 
@@ -224,20 +226,21 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
 }
 
 static ssize_t
-nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count,
-               read_actor_t actor, void *target)
+nfs_file_splice_read(struct file *filp, loff_t *ppos,
+                    struct pipe_inode_info *pipe, size_t count,
+                    unsigned int flags)
 {
        struct dentry *dentry = filp->f_path.dentry;
        struct inode *inode = dentry->d_inode;
        ssize_t res;
 
-       dfprintk(VFS, "nfs: sendfile(%s/%s, %lu@%Lu)\n",
+       dfprintk(VFS, "nfs: splice_read(%s/%s, %lu@%Lu)\n",
                dentry->d_parent->d_name.name, dentry->d_name.name,
                (unsigned long) count, (unsigned long long) *ppos);
 
        res = nfs_revalidate_mapping(inode, filp->f_mapping);
        if (!res)
-               res = generic_file_sendfile(filp, ppos, count, actor, target);
+               res = generic_file_splice_read(filp, ppos, pipe, count, flags);
        return res;
 }
 
index 7e6aa245b5d5f30b270c5336267eb77760be41ac..8604e35bd48e2d5966a244f1763f62def303dc0a 100644 (file)
@@ -23,7 +23,7 @@
 #include <linux/file.h>
 #include <linux/mount.h>
 #include <linux/major.h>
-#include <linux/ext2_fs.h>
+#include <linux/splice.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
@@ -801,26 +801,32 @@ found:
 }
 
 /*
- * Grab and keep cached pages assosiated with a file in the svc_rqst
- * so that they can be passed to the netowork sendmsg/sendpage routines
- * directrly. They will be released after the sending has completed.
+ * Grab and keep cached pages associated with a file in the svc_rqst
+ * so that they can be passed to the network sendmsg/sendpage routines
+ * directly. They will be released after the sending has completed.
  */
 static int
-nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset , unsigned long size)
+nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+                 struct splice_desc *sd)
 {
-       unsigned long count = desc->count;
-       struct svc_rqst *rqstp = desc->arg.data;
+       struct svc_rqst *rqstp = sd->u.data;
        struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
+       struct page *page = buf->page;
+       size_t size;
+       int ret;
+
+       ret = buf->ops->confirm(pipe, buf);
+       if (unlikely(ret))
+               return ret;
 
-       if (size > count)
-               size = count;
+       size = sd->len;
 
        if (rqstp->rq_res.page_len == 0) {
                get_page(page);
                put_page(*pp);
                *pp = page;
                rqstp->rq_resused++;
-               rqstp->rq_res.page_base = offset;
+               rqstp->rq_res.page_base = buf->offset;
                rqstp->rq_res.page_len = size;
        } else if (page != pp[-1]) {
                get_page(page);
@@ -832,11 +838,15 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset
        } else
                rqstp->rq_res.page_len += size;
 
-       desc->count = count - size;
-       desc->written += size;
        return size;
 }
 
+static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
+                                   struct splice_desc *sd)
+{
+       return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
+}
+
 static __be32
 nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
               loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
@@ -861,10 +871,15 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
        if (ra && ra->p_set)
                file->f_ra = ra->p_ra;
 
-       if (file->f_op->sendfile && rqstp->rq_sendfile_ok) {
-               rqstp->rq_resused = 1;
-               host_err = file->f_op->sendfile(file, &offset, *count,
-                                                nfsd_read_actor, rqstp);
+       if (file->f_op->splice_read && rqstp->rq_splice_ok) {
+               struct splice_desc sd = {
+                       .len            = 0,
+                       .total_len      = *count,
+                       .pos            = offset,
+                       .u.data         = rqstp,
+               };
+
+               host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
        } else {
                oldfs = get_fs();
                set_fs(KERNEL_DS);
index 7ed56390b5826798f3d67bded0781b7382fe25b3..ffcc504a1667be04daea41339c53cd16d7ca84e4 100644 (file)
@@ -2276,7 +2276,7 @@ const struct file_operations ntfs_file_ops = {
                                                    mounted filesystem. */
        .mmap           = generic_file_mmap,     /* Mmap file. */
        .open           = ntfs_file_open,        /* Open file. */
-       .sendfile       = generic_file_sendfile, /* Zero-copy data send with
+       .splice_read    = generic_file_splice_read /* Zero-copy data send with
                                                    the data source being on
                                                    the ntfs partition.  We do
                                                    not need to care about the
index ac6c96431bbcb3a7687b522465e456d68246c002..4979b667571734c151a88b2a03f01db5e2b8c761 100644 (file)
@@ -31,7 +31,7 @@
 #include <linux/pagemap.h>
 #include <linux/uio.h>
 #include <linux/sched.h>
-#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>
 #include <linux/mount.h>
 #include <linux/writeback.h>
 
@@ -1583,7 +1583,7 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
        ssize_t copied = 0;
        struct ocfs2_splice_write_priv sp;
 
-       ret = buf->ops->pin(pipe, buf);
+       ret = buf->ops->confirm(pipe, buf);
        if (ret)
                goto out;
 
@@ -1604,7 +1604,7 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
                 * might enter ocfs2_buffered_write_cluster() more
                 * than once, so keep track of our progress here.
                 */
-               copied = ocfs2_buffered_write_cluster(sd->file,
+               copied = ocfs2_buffered_write_cluster(sd->u.file,
                                                      (loff_t)sd->pos + total,
                                                      count,
                                                      ocfs2_map_and_write_splice_data,
@@ -1636,9 +1636,14 @@ static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
        int ret, err;
        struct address_space *mapping = out->f_mapping;
        struct inode *inode = mapping->host;
-
-       ret = __splice_from_pipe(pipe, out, ppos, len, flags,
-                                ocfs2_splice_write_actor);
+       struct splice_desc sd = {
+               .total_len = len,
+               .flags = flags,
+               .pos = *ppos,
+               .u.file = out,
+       };
+
+       ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor);
        if (ret > 0) {
                *ppos += ret;
 
@@ -1817,7 +1822,6 @@ const struct inode_operations ocfs2_special_file_iops = {
 const struct file_operations ocfs2_fops = {
        .read           = do_sync_read,
        .write          = do_sync_write,
-       .sendfile       = generic_file_sendfile,
        .mmap           = ocfs2_mmap,
        .fsync          = ocfs2_sync_file,
        .release        = ocfs2_file_release,
index 3a89592bdf577930b4c1698c4811261aa467cbf2..d007830d9c870fabb4b887d3f81afcb8e7160642 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -164,6 +164,20 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
                page_cache_release(page);
 }
 
+/**
+ * generic_pipe_buf_map - virtually map a pipe buffer
+ * @pipe:      the pipe that the buffer belongs to
+ * @buf:       the buffer that should be mapped
+ * @atomic:    whether to use an atomic map
+ *
+ * Description:
+ *     This function returns a kernel virtual address mapping for the
+ *     passed in @pipe_buffer. If @atomic is set, an atomic map is provided
+ *     and the caller has to be careful not to fault before calling
+ *     the unmap function.
+ *
+ *     Note that this function occupies KM_USER0 if @atomic != 0.
+ */
 void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
                           struct pipe_buffer *buf, int atomic)
 {
@@ -175,6 +189,15 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
        return kmap(buf->page);
 }
 
+/**
+ * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer
+ * @pipe:      the pipe that the buffer belongs to
+ * @buf:       the buffer that should be unmapped
+ * @map_data:  the data that the mapping function returned
+ *
+ * Description:
+ *     This function undoes the mapping that ->map() provided.
+ */
 void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
                            struct pipe_buffer *buf, void *map_data)
 {
@@ -185,11 +208,28 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
                kunmap(buf->page);
 }
 
+/**
+ * generic_pipe_buf_steal - attempt to take ownership of a @pipe_buffer
+ * @pipe:      the pipe that the buffer belongs to
+ * @buf:       the buffer to attempt to steal
+ *
+ * Description:
+ *     This function attempts to steal the @struct page attached to
+ *     @buf. If successful, this function returns 0 and returns with
+ *     the page locked. The caller may then reuse the page for whatever
+ *     he wishes, the typical use is insertion into a different file
+ *     page cache.
+ */
 int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
                           struct pipe_buffer *buf)
 {
        struct page *page = buf->page;
 
+       /*
+        * A reference of one is golden, that means that the owner of this
+        * page is the only one holding a reference to it. lock the page
+        * and return OK.
+        */
        if (page_count(page) == 1) {
                lock_page(page);
                return 0;
@@ -198,12 +238,32 @@ int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
        return 1;
 }
 
-void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf)
+/**
+ * generic_pipe_buf_get - get a reference to a @struct pipe_buffer
+ * @pipe:      the pipe that the buffer belongs to
+ * @buf:       the buffer to get a reference to
+ *
+ * Description:
+ *     This function grabs an extra reference to @buf. It's used in
+ *     in the tee() system call, when we duplicate the buffers in one
+ *     pipe into another.
+ */
+void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
 {
        page_cache_get(buf->page);
 }
 
-int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf)
+/**
+ * generic_pipe_buf_confirm - verify contents of the pipe buffer
+ * @pipe:      the pipe that the buffer belongs to
+ * @buf:       the buffer to confirm
+ *
+ * Description:
+ *     This function does nothing, because the generic pipe code uses
+ *     pages that are always good when inserted into the pipe.
+ */
+int generic_pipe_buf_confirm(struct pipe_inode_info *info,
+                            struct pipe_buffer *buf)
 {
        return 0;
 }
@@ -212,7 +272,7 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
        .can_merge = 1,
        .map = generic_pipe_buf_map,
        .unmap = generic_pipe_buf_unmap,
-       .pin = generic_pipe_buf_pin,
+       .confirm = generic_pipe_buf_confirm,
        .release = anon_pipe_buf_release,
        .steal = generic_pipe_buf_steal,
        .get = generic_pipe_buf_get,
@@ -252,7 +312,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
                        if (chars > total_len)
                                chars = total_len;
 
-                       error = ops->pin(pipe, buf);
+                       error = ops->confirm(pipe, buf);
                        if (error) {
                                if (!ret)
                                        error = ret;
@@ -373,7 +433,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
                        int error, atomic = 1;
                        void *addr;
 
-                       error = ops->pin(pipe, buf);
+                       error = ops->confirm(pipe, buf);
                        if (error)
                                goto out;
 
index 44649981bbc8b08b7c1830d903432bb3fb583701..867f42b02035dd6621f681a6730d90c2a3a3c4ae 100644 (file)
@@ -25,7 +25,7 @@ const struct file_operations qnx4_file_operations =
        .read           = do_sync_read,
        .aio_read       = generic_file_aio_read,
        .mmap           = generic_file_mmap,
-       .sendfile       = generic_file_sendfile,
+       .splice_read    = generic_file_splice_read,
 #ifdef CONFIG_QNX4FS_RW
        .write          = do_sync_write,
        .aio_write      = generic_file_aio_write,
index 2f14774a124fc05e5c995f1c9dd02926440de009..97bdc0b2f9d29b0d253048874dfaa388ae3ec830 100644 (file)
@@ -41,7 +41,7 @@ const struct file_operations ramfs_file_operations = {
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
        .fsync          = simple_sync_file,
-       .sendfile       = generic_file_sendfile,
+       .splice_read    = generic_file_splice_read,
        .llseek         = generic_file_llseek,
 };
 
index 5d258c40a2fd9c59fead95c4d3d84d7a4092bce3..cad2b7ace63033bc4aa0e78393e07d3527f296c9 100644 (file)
@@ -42,7 +42,7 @@ const struct file_operations ramfs_file_operations = {
        .write                  = do_sync_write,
        .aio_write              = generic_file_aio_write,
        .fsync                  = simple_sync_file,
-       .sendfile               = generic_file_sendfile,
+       .splice_read            = generic_file_splice_read,
        .llseek                 = generic_file_llseek,
 };
 
index 4d03008f015b9ae6d2565cdbc179d034687c3d08..507ddff48a9a9e673d75d161d0fc8ad09c00faef 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/syscalls.h>
 #include <linux/pagemap.h>
+#include <linux/splice.h>
 #include "read_write.h"
 
 #include <asm/uaccess.h>
@@ -25,7 +26,7 @@ const struct file_operations generic_ro_fops = {
        .read           = do_sync_read,
        .aio_read       = generic_file_aio_read,
        .mmap           = generic_file_readonly_mmap,
-       .sendfile       = generic_file_sendfile,
+       .splice_read    = generic_file_splice_read,
 };
 
 EXPORT_SYMBOL(generic_ro_fops);
@@ -708,7 +709,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
        struct inode * in_inode, * out_inode;
        loff_t pos;
        ssize_t retval;
-       int fput_needed_in, fput_needed_out;
+       int fput_needed_in, fput_needed_out, fl;
 
        /*
         * Get input file, and verify that it is ok..
@@ -723,7 +724,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
        in_inode = in_file->f_path.dentry->d_inode;
        if (!in_inode)
                goto fput_in;
-       if (!in_file->f_op || !in_file->f_op->sendfile)
+       if (!in_file->f_op || !in_file->f_op->splice_read)
                goto fput_in;
        retval = -ESPIPE;
        if (!ppos)
@@ -776,7 +777,18 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
                count = max - pos;
        }
 
-       retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file);
+       fl = 0;
+#if 0
+       /*
+        * We need to debate whether we can enable this or not. The
+        * man page documents EAGAIN return for the output at least,
+        * and the application is arguably buggy if it doesn't expect
+        * EAGAIN on a non-blocking file descriptor.
+        */
+       if (in_file->f_flags & O_NONBLOCK)
+               fl = SPLICE_F_NONBLOCK;
+#endif
+       retval = do_splice_direct(in_file, ppos, out_file, count, fl);
 
        if (retval > 0) {
                add_rchar(current, retval);
index 9e451a68580f6eef3ab4ba8c6eb729ae843d071b..30eebfb1b2d8d7e9cf26474329d28fd28f401665 100644 (file)
@@ -1531,7 +1531,6 @@ const struct file_operations reiserfs_file_operations = {
        .open = generic_file_open,
        .release = reiserfs_file_release,
        .fsync = reiserfs_sync_file,
-       .sendfile = generic_file_sendfile,
        .aio_read = generic_file_aio_read,
        .aio_write = generic_file_aio_write,
        .splice_read = generic_file_splice_read,
index aea3f8aa54c0911e80110937f9d26ef03694a9ee..c5d78a7e492b40b7dae9dde99b42698522485565 100644 (file)
@@ -262,8 +262,9 @@ out:
 }
 
 static ssize_t
-smb_file_sendfile(struct file *file, loff_t *ppos,
-                 size_t count, read_actor_t actor, void *target)
+smb_file_splice_read(struct file *file, loff_t *ppos,
+                    struct pipe_inode_info *pipe, size_t count,
+                    unsigned int flags)
 {
        struct dentry *dentry = file->f_path.dentry;
        ssize_t status;
@@ -277,7 +278,7 @@ smb_file_sendfile(struct file *file, loff_t *ppos,
                         DENTRY_PATH(dentry), status);
                goto out;
        }
-       status = generic_file_sendfile(file, ppos, count, actor, target);
+       status = generic_file_splice_read(file, ppos, pipe, count, flags);
 out:
        return status;
 }
@@ -416,7 +417,7 @@ const struct file_operations smb_file_operations =
        .open           = smb_file_open,
        .release        = smb_file_release,
        .fsync          = smb_fsync,
-       .sendfile       = smb_file_sendfile,
+       .splice_read    = smb_file_splice_read,
 };
 
 const struct inode_operations smb_file_inode_operations =
index e7d7080de2f9799860a475a4df50a7742561d09f..ed2ce995475c3f3cf6991de68cab9b38285ddc5d 100644 (file)
@@ -20,7 +20,7 @@
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/pagemap.h>
-#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>
 #include <linux/mm_inline.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/syscalls.h>
 #include <linux/uio.h>
 
-struct partial_page {
-       unsigned int offset;
-       unsigned int len;
-};
-
-/*
- * Passed to splice_to_pipe
- */
-struct splice_pipe_desc {
-       struct page **pages;            /* page map */
-       struct partial_page *partial;   /* pages[] may not be contig */
-       int nr_pages;                   /* number of pages in map */
-       unsigned int flags;             /* splice flags */
-       const struct pipe_buf_operations *ops;/* ops associated with output pipe */
-};
-
 /*
  * Attempt to steal a page from a pipe buffer. This should perhaps go into
  * a vm helper function, it's already simplified quite a bit by the
@@ -101,8 +85,12 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
        buf->flags &= ~PIPE_BUF_FLAG_LRU;
 }
 
-static int page_cache_pipe_buf_pin(struct pipe_inode_info *pipe,
-                                  struct pipe_buffer *buf)
+/*
+ * Check whether the contents of buf is OK to access. Since the content
+ * is a page cache page, IO may be in flight.
+ */
+static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
+                                      struct pipe_buffer *buf)
 {
        struct page *page = buf->page;
        int err;
@@ -143,7 +131,7 @@ static const struct pipe_buf_operations page_cache_pipe_buf_ops = {
        .can_merge = 0,
        .map = generic_pipe_buf_map,
        .unmap = generic_pipe_buf_unmap,
-       .pin = page_cache_pipe_buf_pin,
+       .confirm = page_cache_pipe_buf_confirm,
        .release = page_cache_pipe_buf_release,
        .steal = page_cache_pipe_buf_steal,
        .get = generic_pipe_buf_get,
@@ -163,18 +151,25 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
        .can_merge = 0,
        .map = generic_pipe_buf_map,
        .unmap = generic_pipe_buf_unmap,
-       .pin = generic_pipe_buf_pin,
+       .confirm = generic_pipe_buf_confirm,
        .release = page_cache_pipe_buf_release,
        .steal = user_page_pipe_buf_steal,
        .get = generic_pipe_buf_get,
 };
 
-/*
- * Pipe output worker. This sets up our pipe format with the page cache
- * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
+/**
+ * splice_to_pipe - fill passed data into a pipe
+ * @pipe:      pipe to fill
+ * @spd:       data to fill
+ *
+ * Description:
+ *    @spd contains a map of pages and len/offset tupples, a long with
+ *    the struct pipe_buf_operations associated with these pages. This
+ *    function will link that data to the pipe.
+ *
  */
-static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
-                             struct splice_pipe_desc *spd)
+ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
+                      struct splice_pipe_desc *spd)
 {
        unsigned int spd_pages = spd->nr_pages;
        int ret, do_wakeup, page_nr;
@@ -201,6 +196,7 @@ static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
                        buf->page = spd->pages[page_nr];
                        buf->offset = spd->partial[page_nr].offset;
                        buf->len = spd->partial[page_nr].len;
+                       buf->private = spd->partial[page_nr].private;
                        buf->ops = spd->ops;
                        if (spd->flags & SPLICE_F_GIFT)
                                buf->flags |= PIPE_BUF_FLAG_GIFT;
@@ -295,11 +291,6 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
         */
        page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages);
 
-       /*
-        * Now fill in the holes:
-        */
-       error = 0;
-
        /*
         * Lookup the (hopefully) full range of pages we need.
         */
@@ -307,8 +298,9 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 
        /*
         * If find_get_pages_contig() returned fewer pages than we needed,
-        * allocate the rest.
+        * allocate the rest and fill in the holes.
         */
+       error = 0;
        index += spd.nr_pages;
        while (spd.nr_pages < nr_pages) {
                /*
@@ -470,11 +462,16 @@ fill_it:
 /**
  * generic_file_splice_read - splice data from file to a pipe
  * @in:                file to splice from
+ * @ppos:      position in @in
  * @pipe:      pipe to splice to
  * @len:       number of bytes to splice
  * @flags:     splice modifier flags
  *
- * Will read pages from given file and fill them into a pipe.
+ * Description:
+ *    Will read pages from given file and fill them into a pipe. Can be
+ *    used as long as the address_space operations for the source implements
+ *    a readpage() hook.
+ *
  */
 ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
                                 struct pipe_inode_info *pipe, size_t len,
@@ -528,11 +525,11 @@ EXPORT_SYMBOL(generic_file_splice_read);
 static int pipe_to_sendpage(struct pipe_inode_info *pipe,
                            struct pipe_buffer *buf, struct splice_desc *sd)
 {
-       struct file *file = sd->file;
+       struct file *file = sd->u.file;
        loff_t pos = sd->pos;
        int ret, more;
 
-       ret = buf->ops->pin(pipe, buf);
+       ret = buf->ops->confirm(pipe, buf);
        if (!ret) {
                more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
 
@@ -566,7 +563,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
 static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
                        struct splice_desc *sd)
 {
-       struct file *file = sd->file;
+       struct file *file = sd->u.file;
        struct address_space *mapping = file->f_mapping;
        unsigned int offset, this_len;
        struct page *page;
@@ -576,7 +573,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
        /*
         * make sure the data in this buffer is uptodate
         */
-       ret = buf->ops->pin(pipe, buf);
+       ret = buf->ops->confirm(pipe, buf);
        if (unlikely(ret))
                return ret;
 
@@ -663,36 +660,37 @@ out_ret:
        return ret;
 }
 
-/*
- * Pipe input worker. Most of this logic works like a regular pipe, the
- * key here is the 'actor' worker passed in that actually moves the data
- * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
+/**
+ * __splice_from_pipe - splice data from a pipe to given actor
+ * @pipe:      pipe to splice from
+ * @sd:                information to @actor
+ * @actor:     handler that splices the data
+ *
+ * Description:
+ *    This function does little more than loop over the pipe and call
+ *    @actor to do the actual moving of a single struct pipe_buffer to
+ *    the desired destination. See pipe_to_file, pipe_to_sendpage, or
+ *    pipe_to_user.
+ *
  */
-ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
-                          struct file *out, loff_t *ppos, size_t len,
-                          unsigned int flags, splice_actor *actor)
+ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
+                          splice_actor *actor)
 {
        int ret, do_wakeup, err;
-       struct splice_desc sd;
 
        ret = 0;
        do_wakeup = 0;
 
-       sd.total_len = len;
-       sd.flags = flags;
-       sd.file = out;
-       sd.pos = *ppos;
-
        for (;;) {
                if (pipe->nrbufs) {
                        struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
                        const struct pipe_buf_operations *ops = buf->ops;
 
-                       sd.len = buf->len;
-                       if (sd.len > sd.total_len)
-                               sd.len = sd.total_len;
+                       sd->len = buf->len;
+                       if (sd->len > sd->total_len)
+                               sd->len = sd->total_len;
 
-                       err = actor(pipe, buf, &sd);
+                       err = actor(pipe, buf, sd);
                        if (err <= 0) {
                                if (!ret && err != -ENODATA)
                                        ret = err;
@@ -704,10 +702,10 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
                        buf->offset += err;
                        buf->len -= err;
 
-                       sd.len -= err;
-                       sd.pos += err;
-                       sd.total_len -= err;
-                       if (sd.len)
+                       sd->len -= err;
+                       sd->pos += err;
+                       sd->total_len -= err;
+                       if (sd->len)
                                continue;
 
                        if (!buf->len) {
@@ -719,7 +717,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
                                        do_wakeup = 1;
                        }
 
-                       if (!sd.total_len)
+                       if (!sd->total_len)
                                break;
                }
 
@@ -732,7 +730,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
                                break;
                }
 
-               if (flags & SPLICE_F_NONBLOCK) {
+               if (sd->flags & SPLICE_F_NONBLOCK) {
                        if (!ret)
                                ret = -EAGAIN;
                        break;
@@ -766,12 +764,32 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
 }
 EXPORT_SYMBOL(__splice_from_pipe);
 
+/**
+ * splice_from_pipe - splice data from a pipe to a file
+ * @pipe:      pipe to splice from
+ * @out:       file to splice to
+ * @ppos:      position in @out
+ * @len:       how many bytes to splice
+ * @flags:     splice modifier flags
+ * @actor:     handler that splices the data
+ *
+ * Description:
+ *    See __splice_from_pipe. This function locks the input and output inodes,
+ *    otherwise it's identical to __splice_from_pipe().
+ *
+ */
 ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
                         loff_t *ppos, size_t len, unsigned int flags,
                         splice_actor *actor)
 {
        ssize_t ret;
        struct inode *inode = out->f_mapping->host;
+       struct splice_desc sd = {
+               .total_len = len,
+               .flags = flags,
+               .pos = *ppos,
+               .u.file = out,
+       };
 
        /*
         * The actor worker might be calling ->prepare_write and
@@ -780,7 +798,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
         * pipe->inode, we have to order lock acquiry here.
         */
        inode_double_lock(inode, pipe->inode);
-       ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor);
+       ret = __splice_from_pipe(pipe, &sd, actor);
        inode_double_unlock(inode, pipe->inode);
 
        return ret;
@@ -790,12 +808,14 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
  * generic_file_splice_write_nolock - generic_file_splice_write without mutexes
  * @pipe:      pipe info
  * @out:       file to write to
+ * @ppos:      position in @out
  * @len:       number of bytes to splice
  * @flags:     splice modifier flags
  *
- * Will either move or copy pages (determined by @flags options) from
- * the given pipe inode to the given file. The caller is responsible
- * for acquiring i_mutex on both inodes.
+ * Description:
+ *    Will either move or copy pages (determined by @flags options) from
+ *    the given pipe inode to the given file. The caller is responsible
+ *    for acquiring i_mutex on both inodes.
  *
  */
 ssize_t
@@ -804,6 +824,12 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
 {
        struct address_space *mapping = out->f_mapping;
        struct inode *inode = mapping->host;
+       struct splice_desc sd = {
+               .total_len = len,
+               .flags = flags,
+               .pos = *ppos,
+               .u.file = out,
+       };
        ssize_t ret;
        int err;
 
@@ -811,7 +837,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
        if (unlikely(err))
                return err;
 
-       ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
+       ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
        if (ret > 0) {
                unsigned long nr_pages;
 
@@ -841,11 +867,13 @@ EXPORT_SYMBOL(generic_file_splice_write_nolock);
  * generic_file_splice_write - splice data from a pipe to a file
  * @pipe:      pipe info
  * @out:       file to write to
+ * @ppos:      position in @out
  * @len:       number of bytes to splice
  * @flags:     splice modifier flags
  *
- * Will either move or copy pages (determined by @flags options) from
- * the given pipe inode to the given file.
+ * Description:
+ *    Will either move or copy pages (determined by @flags options) from
+ *    the given pipe inode to the given file.
  *
  */
 ssize_t
@@ -896,13 +924,15 @@ EXPORT_SYMBOL(generic_file_splice_write);
 
 /**
  * generic_splice_sendpage - splice data from a pipe to a socket
- * @inode:     pipe inode
+ * @pipe:      pipe to splice from
  * @out:       socket to write to
+ * @ppos:      position in @out
  * @len:       number of bytes to splice
  * @flags:     splice modifier flags
  *
- * Will send @len bytes from the pipe to a network socket. No data copying
- * is involved.
+ * Description:
+ *    Will send @len bytes from the pipe to a network socket. No data copying
+ *    is involved.
  *
  */
 ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
@@ -956,14 +986,27 @@ static long do_splice_to(struct file *in, loff_t *ppos,
        return in->f_op->splice_read(in, ppos, pipe, len, flags);
 }
 
-long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
-                     size_t len, unsigned int flags)
+/**
+ * splice_direct_to_actor - splices data directly between two non-pipes
+ * @in:                file to splice from
+ * @sd:                actor information on where to splice to
+ * @actor:     handles the data splicing
+ *
+ * Description:
+ *    This is a special case helper to splice directly between two
+ *    points, without requiring an explicit pipe. Internally an allocated
+ *    pipe is cached in the process, and reused during the life time of
+ *    that process.
+ *
+ */
+ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
+                              splice_direct_actor *actor)
 {
        struct pipe_inode_info *pipe;
        long ret, bytes;
-       loff_t out_off;
        umode_t i_mode;
-       int i;
+       size_t len;
+       int i, flags;
 
        /*
         * We require the input being a regular file, as we don't want to
@@ -999,7 +1042,13 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
         */
        ret = 0;
        bytes = 0;
-       out_off = 0;
+       len = sd->total_len;
+       flags = sd->flags;
+
+       /*
+        * Don't block on output, we have to drain the direct pipe.
+        */
+       sd->flags &= ~SPLICE_F_NONBLOCK;
 
        while (len) {
                size_t read_len, max_read_len;
@@ -1009,19 +1058,19 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
                 */
                max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
 
-               ret = do_splice_to(in, ppos, pipe, max_read_len, flags);
+               ret = do_splice_to(in, &sd->pos, pipe, max_read_len, flags);
                if (unlikely(ret < 0))
                        goto out_release;
 
                read_len = ret;
+               sd->total_len = read_len;
 
                /*
                 * NOTE: nonblocking mode only applies to the input. We
                 * must not do the output in nonblocking mode as then we
                 * could get stuck data in the internal pipe:
                 */
-               ret = do_splice_from(pipe, out, &out_off, read_len,
-                                    flags & ~SPLICE_F_NONBLOCK);
+               ret = actor(pipe, sd);
                if (unlikely(ret < 0))
                        goto out_release;
 
@@ -1066,6 +1115,48 @@ out_release:
                return bytes;
 
        return ret;
+
+}
+EXPORT_SYMBOL(splice_direct_to_actor);
+
+static int direct_splice_actor(struct pipe_inode_info *pipe,
+                              struct splice_desc *sd)
+{
+       struct file *file = sd->u.file;
+
+       return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
+}
+
+/**
+ * do_splice_direct - splices data directly between two files
+ * @in:                file to splice from
+ * @ppos:      input file offset
+ * @out:       file to splice to
+ * @len:       number of bytes to splice
+ * @flags:     splice modifier flags
+ *
+ * Description:
+ *    For use by do_sendfile(). splice can easily emulate sendfile, but
+ *    doing it in the application would incur an extra system call
+ *    (splice in + splice out, as compared to just sendfile()). So this helper
+ *    can splice directly through a process-private pipe.
+ *
+ */
+long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
+                     size_t len, unsigned int flags)
+{
+       struct splice_desc sd = {
+               .len            = len,
+               .total_len      = len,
+               .flags          = flags,
+               .pos            = *ppos,
+               .u.file         = out,
+       };
+       size_t ret;
+
+       ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
+       *ppos = sd.pos;
+       return ret;
 }
 
 /*
@@ -1248,28 +1339,131 @@ static int get_iovec_page_array(const struct iovec __user *iov,
        return error;
 }
 
+static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+                       struct splice_desc *sd)
+{
+       char *src;
+       int ret;
+
+       ret = buf->ops->confirm(pipe, buf);
+       if (unlikely(ret))
+               return ret;
+
+       /*
+        * See if we can use the atomic maps, by prefaulting in the
+        * pages and doing an atomic copy
+        */
+       if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) {
+               src = buf->ops->map(pipe, buf, 1);
+               ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset,
+                                                       sd->len);
+               buf->ops->unmap(pipe, buf, src);
+               if (!ret) {
+                       ret = sd->len;
+                       goto out;
+               }
+       }
+
+       /*
+        * No dice, use slow non-atomic map and copy
+        */
+       src = buf->ops->map(pipe, buf, 0);
+
+       ret = sd->len;
+       if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
+               ret = -EFAULT;
+
+out:
+       if (ret > 0)
+               sd->u.userptr += ret;
+       buf->ops->unmap(pipe, buf, src);
+       return ret;
+}
+
+/*
+ * For lack of a better implementation, implement vmsplice() to userspace
+ * as a simple copy of the pipes pages to the user iov.
+ */
+static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
+                            unsigned long nr_segs, unsigned int flags)
+{
+       struct pipe_inode_info *pipe;
+       struct splice_desc sd;
+       ssize_t size;
+       int error;
+       long ret;
+
+       pipe = pipe_info(file->f_path.dentry->d_inode);
+       if (!pipe)
+               return -EBADF;
+
+       if (pipe->inode)
+               mutex_lock(&pipe->inode->i_mutex);
+
+       error = ret = 0;
+       while (nr_segs) {
+               void __user *base;
+               size_t len;
+
+               /*
+                * Get user address base and length for this iovec.
+                */
+               error = get_user(base, &iov->iov_base);
+               if (unlikely(error))
+                       break;
+               error = get_user(len, &iov->iov_len);
+               if (unlikely(error))
+                       break;
+
+               /*
+                * Sanity check this iovec. 0 read succeeds.
+                */
+               if (unlikely(!len))
+                       break;
+               if (unlikely(!base)) {
+                       error = -EFAULT;
+                       break;
+               }
+
+               sd.len = 0;
+               sd.total_len = len;
+               sd.flags = flags;
+               sd.u.userptr = base;
+               sd.pos = 0;
+
+               size = __splice_from_pipe(pipe, &sd, pipe_to_user);
+               if (size < 0) {
+                       if (!ret)
+                               ret = size;
+
+                       break;
+               }
+
+               ret += size;
+
+               if (size < len)
+                       break;
+
+               nr_segs--;
+               iov++;
+       }
+
+       if (pipe->inode)
+               mutex_unlock(&pipe->inode->i_mutex);
+
+       if (!ret)
+               ret = error;
+
+       return ret;
+}
+
 /*
  * vmsplice splices a user address range into a pipe. It can be thought of
  * as splice-from-memory, where the regular splice is splice-from-file (or
  * to file). In both cases the output is a pipe, naturally.
- *
- * Note that vmsplice only supports splicing _from_ user memory to a pipe,
- * not the other way around. Splicing from user memory is a simple operation
- * that can be supported without any funky alignment restrictions or nasty
- * vm tricks. We simply map in the user memory and fill them into a pipe.
- * The reverse isn't quite as easy, though. There are two possible solutions
- * for that:
- *
- *     - memcpy() the data internally, at which point we might as well just
- *       do a regular read() on the buffer anyway.
- *     - Lots of nasty vm tricks, that are neither fast nor flexible (it
- *       has restriction limitations on both ends of the pipe).
- *
- * Alas, it isn't here.
- *
  */
-static long do_vmsplice(struct file *file, const struct iovec __user *iov,
-                       unsigned long nr_segs, unsigned int flags)
+static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
+                            unsigned long nr_segs, unsigned int flags)
 {
        struct pipe_inode_info *pipe;
        struct page *pages[PIPE_BUFFERS];
@@ -1284,10 +1478,6 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
        pipe = pipe_info(file->f_path.dentry->d_inode);
        if (!pipe)
                return -EBADF;
-       if (unlikely(nr_segs > UIO_MAXIOV))
-               return -EINVAL;
-       else if (unlikely(!nr_segs))
-               return 0;
 
        spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
                                            flags & SPLICE_F_GIFT);
@@ -1297,6 +1487,22 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
        return splice_to_pipe(pipe, &spd);
 }
 
+/*
+ * Note that vmsplice only really supports true splicing _from_ user memory
+ * to a pipe, not the other way around. Splicing from user memory is a simple
+ * operation that can be supported without any funky alignment restrictions
+ * or nasty vm tricks. We simply map in the user memory and fill them into
+ * a pipe. The reverse isn't quite as easy, though. There are two possible
+ * solutions for that:
+ *
+ *     - memcpy() the data internally, at which point we might as well just
+ *       do a regular read() on the buffer anyway.
+ *     - Lots of nasty vm tricks, that are neither fast nor flexible (it
+ *       has restriction limitations on both ends of the pipe).
+ *
+ * Currently we punt and implement it as a normal copy, see pipe_to_user().
+ *
+ */
 asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
                             unsigned long nr_segs, unsigned int flags)
 {
@@ -1304,11 +1510,18 @@ asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
        long error;
        int fput;
 
+       if (unlikely(nr_segs > UIO_MAXIOV))
+               return -EINVAL;
+       else if (unlikely(!nr_segs))
+               return 0;
+
        error = -EBADF;
        file = fget_light(fd, &fput);
        if (file) {
                if (file->f_mode & FMODE_WRITE)
-                       error = do_vmsplice(file, iov, nr_segs, flags);
+                       error = vmsplice_to_pipe(file, iov, nr_segs, flags);
+               else if (file->f_mode & FMODE_READ)
+                       error = vmsplice_to_user(file, iov, nr_segs, flags);
 
                fput_light(file, fput);
        }
index 0732ddb9020beddba8e0c8caf0a71a43ed3c7eb8..589be21d884e362d17433f4e55999d73b537203a 100644 (file)
@@ -27,7 +27,7 @@ const struct file_operations sysv_file_operations = {
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
        .fsync          = sysv_sync_file,
-       .sendfile       = generic_file_sendfile,
+       .splice_read    = generic_file_splice_read,
 };
 
 const struct inode_operations sysv_file_inode_operations = {
index 51b5764685e777f5b3489c9c6e83aac898233559..df070bee8d4f83f3a6966c6fe2485541e217accc 100644 (file)
@@ -261,7 +261,7 @@ const struct file_operations udf_file_operations = {
        .aio_write              = udf_file_aio_write,
        .release                = udf_release_file,
        .fsync                  = udf_fsync_file,
-       .sendfile               = generic_file_sendfile,
+       .splice_read            = generic_file_splice_read,
 };
 
 const struct inode_operations udf_file_inode_operations = {
index 1e096323bad47f240bdbc30191922d46638a5612..6705d74c6d2d9b836dcdd3d3be1d40b65b3bf8e1 100644 (file)
@@ -60,5 +60,5 @@ const struct file_operations ufs_file_operations = {
        .mmap           = generic_file_mmap,
        .open           = generic_file_open,
        .fsync          = ufs_sync_file,
-       .sendfile       = generic_file_sendfile,
+       .splice_read    = generic_file_splice_read,
 };
index cb51dc9613555b6c8a37160b0d45d727359235bd..8c43cd2e237a5f15b18c743e2c13a10fd8c0f51f 100644 (file)
@@ -123,30 +123,6 @@ xfs_file_aio_write_invis(
        return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
 }
 
-STATIC ssize_t
-xfs_file_sendfile(
-       struct file             *filp,
-       loff_t                  *pos,
-       size_t                  count,
-       read_actor_t            actor,
-       void                    *target)
-{
-       return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode),
-                               filp, pos, 0, count, actor, target, NULL);
-}
-
-STATIC ssize_t
-xfs_file_sendfile_invis(
-       struct file             *filp,
-       loff_t                  *pos,
-       size_t                  count,
-       read_actor_t            actor,
-       void                    *target)
-{
-       return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode),
-                               filp, pos, IO_INVIS, count, actor, target, NULL);
-}
-
 STATIC ssize_t
 xfs_file_splice_read(
        struct file             *infilp,
@@ -452,7 +428,6 @@ const struct file_operations xfs_file_operations = {
        .write          = do_sync_write,
        .aio_read       = xfs_file_aio_read,
        .aio_write      = xfs_file_aio_write,
-       .sendfile       = xfs_file_sendfile,
        .splice_read    = xfs_file_splice_read,
        .splice_write   = xfs_file_splice_write,
        .unlocked_ioctl = xfs_file_ioctl,
@@ -475,7 +450,6 @@ const struct file_operations xfs_invis_file_operations = {
        .write          = do_sync_write,
        .aio_read       = xfs_file_aio_read_invis,
        .aio_write      = xfs_file_aio_write_invis,
-       .sendfile       = xfs_file_sendfile_invis,
        .splice_read    = xfs_file_splice_read_invis,
        .splice_write   = xfs_file_splice_write_invis,
        .unlocked_ioctl = xfs_file_ioctl_invis,
index 715adad7dd4dd07607edda8aae8e30bb31697899..af24a457d3a33efebe55719cb3dd1bc0e755fe5d 100644 (file)
  * Feature macros (disable/enable)
  */
 #undef  HAVE_REFCACHE  /* reference cache not needed for NFS in 2.6 */
-#define HAVE_SENDFILE  /* sendfile(2) exists in 2.6, but not in 2.4 */
 #define HAVE_SPLICE    /* a splice(2) exists in 2.6, but not in 2.4 */
 #ifdef CONFIG_SMP
 #define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
index ed90403f0ee71dcfaefc4ae03d84990d59155f3e..765ec16a6e392d3406d1891c6a58bfc456f1d23a 100644 (file)
@@ -286,50 +286,6 @@ xfs_read(
        return ret;
 }
 
-ssize_t
-xfs_sendfile(
-       bhv_desc_t              *bdp,
-       struct file             *filp,
-       loff_t                  *offset,
-       int                     ioflags,
-       size_t                  count,
-       read_actor_t            actor,
-       void                    *target,
-       cred_t                  *credp)
-{
-       xfs_inode_t             *ip = XFS_BHVTOI(bdp);
-       xfs_mount_t             *mp = ip->i_mount;
-       ssize_t                 ret;
-
-       XFS_STATS_INC(xs_read_calls);
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -EIO;
-
-       xfs_ilock(ip, XFS_IOLOCK_SHARED);
-
-       if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
-           (!(ioflags & IO_INVIS))) {
-               bhv_vrwlock_t locktype = VRWLOCK_READ;
-               int error;
-
-               error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
-                                     *offset, count,
-                                     FILP_DELAY_FLAG(filp), &locktype);
-               if (error) {
-                       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-                       return -error;
-               }
-       }
-       xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,
-                  (void *)(unsigned long)target, count, *offset, ioflags);
-       ret = generic_file_sendfile(filp, offset, count, actor, target);
-       if (ret > 0)
-               XFS_STATS_ADD(xs_read_bytes, ret);
-
-       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-       return ret;
-}
-
 ssize_t
 xfs_splice_read(
        bhv_desc_t              *bdp,
index 7ac51b1d2161c6b9d2033dc3d3ea96d3e1ff38d7..7c60a1eed88ba6c347d5ba383147d56e077f459c 100644 (file)
@@ -90,9 +90,6 @@ extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
 extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
                                const struct iovec *, unsigned int,
                                loff_t *, int, struct cred *);
-extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
-                               loff_t *, int, size_t, read_actor_t,
-                               void *, struct cred *);
 extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, loff_t *,
                                struct pipe_inode_info *, size_t, int, int,
                                struct cred *);
index d1b2d01843d177d2f75b8bed5f0c07d3061ba519..013048a92643d1adb1001b652b07c948add850dc 100644 (file)
@@ -139,9 +139,6 @@ typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
 typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
                                const struct iovec *, unsigned int,
                                loff_t *, int, struct cred *);
-typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
-                               loff_t *, int, size_t, read_actor_t,
-                               void *, struct cred *);
 typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, loff_t *,
                                struct pipe_inode_info *, size_t, int, int,
                                struct cred *);
@@ -206,7 +203,6 @@ typedef struct bhv_vnodeops {
        vop_close_t             vop_close;
        vop_read_t              vop_read;
        vop_write_t             vop_write;
-       vop_sendfile_t          vop_sendfile;
        vop_splice_read_t       vop_splice_read;
        vop_splice_write_t      vop_splice_write;
        vop_ioctl_t             vop_ioctl;
@@ -254,8 +250,6 @@ typedef struct bhv_vnodeops {
                VOP(vop_read, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
 #define bhv_vop_write(vp,file,iov,segs,offset,ioflags,cr)              \
                VOP(vop_write, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
-#define bhv_vop_sendfile(vp,f,off,ioflags,cnt,act,targ,cr)             \
-               VOP(vop_sendfile, vp)(VNHEAD(vp),f,off,ioflags,cnt,act,targ,cr)
 #define bhv_vop_splice_read(vp,f,o,pipe,cnt,fl,iofl,cr)                        \
                VOP(vop_splice_read, vp)(VNHEAD(vp),f,o,pipe,cnt,fl,iofl,cr)
 #define bhv_vop_splice_write(vp,f,o,pipe,cnt,fl,iofl,cr)               \
index de17aed578f04878d7942cd113103fc19ef5619f..70bc82f65311e3b2a57a7cda2fd292256f8201f6 100644 (file)
@@ -4680,9 +4680,6 @@ bhv_vnodeops_t xfs_vnodeops = {
        .vop_open               = xfs_open,
        .vop_close              = xfs_close,
        .vop_read               = xfs_read,
-#ifdef HAVE_SENDFILE
-       .vop_sendfile           = xfs_sendfile,
-#endif
 #ifdef HAVE_SPLICE
        .vop_splice_read        = xfs_splice_read,
        .vop_splice_write       = xfs_splice_write,
index 6a41f4cab14c9a6efa90c824006497a55a230a19..4f0b3bf5983c81fb495794d09b9b5e7c632b17dd 100644 (file)
@@ -1054,7 +1054,7 @@ struct block_device_operations {
 };
 
 /*
- * "descriptor" for what we're up to with a read for sendfile().
+ * "descriptor" for what we're up to with a read.
  * This allows us to use the same read code yet
  * have multiple different users of the data that
  * we read from a file.
@@ -1105,7 +1105,6 @@ struct file_operations {
        int (*aio_fsync) (struct kiocb *, int datasync);
        int (*fasync) (int, struct file *, int);
        int (*lock) (struct file *, int, struct file_lock *);
-       ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *);
        ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
        unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
        int (*check_flags)(int);
@@ -1762,7 +1761,6 @@ extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
                unsigned long, loff_t, loff_t *, size_t, ssize_t);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
-extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
 extern void do_generic_mapping_read(struct address_space *mapping,
                                    struct file_ra_state *, struct file *,
                                    loff_t *, read_descriptor_t *, read_actor_t);
@@ -1792,9 +1790,6 @@ extern int nonseekable_open(struct inode * inode, struct file * filp);
 #ifdef CONFIG_FS_XIP
 extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len,
                             loff_t *ppos);
-extern ssize_t xip_file_sendfile(struct file *in_file, loff_t *ppos,
-                                size_t count, read_actor_t actor,
-                                void *target);
 extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma);
 extern ssize_t xip_file_write(struct file *filp, const char __user *buf,
                              size_t len, loff_t *ppos);
index c8884f9712285b25cc88ff7518c2e0b79e0bcb39..8e4120285f72ef3379be0de768880b67453437c9 100644 (file)
@@ -9,13 +9,39 @@
 #define PIPE_BUF_FLAG_ATOMIC   0x02    /* was atomically mapped */
 #define PIPE_BUF_FLAG_GIFT     0x04    /* page is a gift */
 
+/**
+ *     struct pipe_buffer - a linux kernel pipe buffer
+ *     @page: the page containing the data for the pipe buffer
+ *     @offset: offset of data inside the @page
+ *     @len: length of data inside the @page
+ *     @ops: operations associated with this buffer. See @pipe_buf_operations.
+ *     @flags: pipe buffer flags. See above.
+ *     @private: private data owned by the ops.
+ **/
 struct pipe_buffer {
        struct page *page;
        unsigned int offset, len;
        const struct pipe_buf_operations *ops;
        unsigned int flags;
+       unsigned long private;
 };
 
+/**
+ *     struct pipe_inode_info - a linux kernel pipe
+ *     @wait: reader/writer wait point in case of empty/full pipe
+ *     @nrbufs: the number of non-empty pipe buffers in this pipe
+ *     @curbuf: the current pipe buffer entry
+ *     @tmp_page: cached released page
+ *     @readers: number of current readers of this pipe
+ *     @writers: number of current writers of this pipe
+ *     @waiting_writers: number of writers blocked waiting for room
+ *     @r_counter: reader counter
+ *     @w_counter: writer counter
+ *     @fasync_readers: reader side fasync
+ *     @fasync_writers: writer side fasync
+ *     @inode: inode this pipe is attached to
+ *     @bufs: the circular array of pipe buffers
+ **/
 struct pipe_inode_info {
        wait_queue_head_t wait;
        unsigned int nrbufs, curbuf;
@@ -34,22 +60,73 @@ struct pipe_inode_info {
 /*
  * Note on the nesting of these functions:
  *
- * ->pin()
+ * ->confirm()
  *     ->steal()
  *     ...
  *     ->map()
  *     ...
  *     ->unmap()
  *
- * That is, ->map() must be called on a pinned buffer, same goes for ->steal().
+ * That is, ->map() must be called on a confirmed buffer,
+ * same goes for ->steal(). See below for the meaning of each
+ * operation. Also see kerneldoc in fs/pipe.c for the pipe
+ * and generic variants of these hooks.
  */
 struct pipe_buf_operations {
+       /*
+        * This is set to 1, if the generic pipe read/write may coalesce
+        * data into an existing buffer. If this is set to 0, a new pipe
+        * page segment is always used for new data.
+        */
        int can_merge;
+
+       /*
+        * ->map() returns a virtual address mapping of the pipe buffer.
+        * The last integer flag reflects whether this should be an atomic
+        * mapping or not. The atomic map is faster, however you can't take
+        * page faults before calling ->unmap() again. So if you need to eg
+        * access user data through copy_to/from_user(), then you must get
+        * a non-atomic map. ->map() uses the KM_USER0 atomic slot for
+        * atomic maps, so you can't map more than one pipe_buffer at once
+        * and you have to be careful if mapping another page as source
+        * or destination for a copy (IOW, it has to use something else
+        * than KM_USER0).
+        */
        void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int);
+
+       /*
+        * Undoes ->map(), finishes the virtual mapping of the pipe buffer.
+        */
        void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *);
-       int (*pin)(struct pipe_inode_info *, struct pipe_buffer *);
+
+       /*
+        * ->confirm() verifies that the data in the pipe buffer is there
+        * and that the contents are good. If the pages in the pipe belong
+        * to a file system, we may need to wait for IO completion in this
+        * hook. Returns 0 for good, or a negative error value in case of
+        * error.
+        */
+       int (*confirm)(struct pipe_inode_info *, struct pipe_buffer *);
+
+       /*
+        * When the contents of this pipe buffer has been completely
+        * consumed by a reader, ->release() is called.
+        */
        void (*release)(struct pipe_inode_info *, struct pipe_buffer *);
+
+       /*
+        * Attempt to take ownership of the pipe buffer and its contents.
+        * ->steal() returns 0 for success, in which case the contents
+        * of the pipe (the buf->page) is locked and now completely owned
+        * by the caller. The page may then be transferred to a different
+        * mapping, the most often used case is insertion into different
+        * file address space cache.
+        */
        int (*steal)(struct pipe_inode_info *, struct pipe_buffer *);
+
+       /*
+        * Get a reference to the pipe buffer.
+        */
        void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
 };
 
@@ -68,39 +145,7 @@ void __free_pipe_info(struct pipe_inode_info *);
 void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int);
 void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *);
 void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
-int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *);
+int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
 
-/*
- * splice is tied to pipes as a transport (at least for now), so we'll just
- * add the splice flags here.
- */
-#define SPLICE_F_MOVE  (0x01)  /* move pages instead of copying */
-#define SPLICE_F_NONBLOCK (0x02) /* don't block on the pipe splicing (but */
-                                /* we may still block on the fd we splice */
-                                /* from/to, of course */
-#define SPLICE_F_MORE  (0x04)  /* expect more data */
-#define SPLICE_F_GIFT  (0x08)  /* pages passed in are a gift */
-
-/*
- * Passed to the actors
- */
-struct splice_desc {
-       unsigned int len, total_len;    /* current and remaining length */
-       unsigned int flags;             /* splice flags */
-       struct file *file;              /* file to read/write */
-       loff_t pos;                     /* file position */
-};
-
-typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
-                          struct splice_desc *);
-
-extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
-                               loff_t *, size_t, unsigned int,
-                               splice_actor *);
-
-extern ssize_t __splice_from_pipe(struct pipe_inode_info *, struct file *,
-                                 loff_t *, size_t, unsigned int,
-                                 splice_actor *);
-
 #endif
diff --git a/include/linux/splice.h b/include/linux/splice.h
new file mode 100644 (file)
index 0000000..33e447f
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * Function declerations and data structures related to the splice
+ * implementation.
+ *
+ * Copyright (C) 2007 Jens Axboe <jens.axboe@oracle.com>
+ *
+ */
+#ifndef SPLICE_H
+#define SPLICE_H
+
+#include <linux/pipe_fs_i.h>
+
+/*
+ * splice is tied to pipes as a transport (at least for now), so we'll just
+ * add the splice flags here.
+ */
+#define SPLICE_F_MOVE  (0x01)  /* move pages instead of copying */
+#define SPLICE_F_NONBLOCK (0x02) /* don't block on the pipe splicing (but */
+                                /* we may still block on the fd we splice */
+                                /* from/to, of course */
+#define SPLICE_F_MORE  (0x04)  /* expect more data */
+#define SPLICE_F_GIFT  (0x08)  /* pages passed in are a gift */
+
+/*
+ * Passed to the actors
+ */
+struct splice_desc {
+       unsigned int len, total_len;    /* current and remaining length */
+       unsigned int flags;             /* splice flags */
+       /*
+        * actor() private data
+        */
+       union {
+               void __user *userptr;   /* memory to write to */
+               struct file *file;      /* file to read/write */
+               void *data;             /* cookie */
+       } u;
+       loff_t pos;                     /* file position */
+};
+
+struct partial_page {
+       unsigned int offset;
+       unsigned int len;
+       unsigned long private;
+};
+
+/*
+ * Passed to splice_to_pipe
+ */
+struct splice_pipe_desc {
+       struct page **pages;            /* page map */
+       struct partial_page *partial;   /* pages[] may not be contig */
+       int nr_pages;                   /* number of pages in map */
+       unsigned int flags;             /* splice flags */
+       const struct pipe_buf_operations *ops;/* ops associated with output pipe */
+};
+
+typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
+                          struct splice_desc *);
+typedef int (splice_direct_actor)(struct pipe_inode_info *,
+                                 struct splice_desc *);
+
+extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
+                               loff_t *, size_t, unsigned int,
+                               splice_actor *);
+extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
+                                 struct splice_desc *, splice_actor *);
+extern ssize_t splice_to_pipe(struct pipe_inode_info *,
+                             struct splice_pipe_desc *);
+extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
+                                     splice_direct_actor *);
+
+#endif
index 4a7ae8ab6eb873e75aa2f14c59fc40332dd7da48..129d50f2225c02b8bb94505de60dc579b9a87427 100644 (file)
@@ -253,7 +253,7 @@ struct svc_rqst {
                                                 * determine what device number
                                                 * to report (real or virtual)
                                                 */
-       int                     rq_sendfile_ok; /* turned off in gss privacy
+       int                     rq_splice_ok;   /* turned off in gss privacy
                                                 * to prevent encrypting page
                                                 * cache pages */
        wait_queue_head_t       rq_wait;        /* synchronization */
index 95db8c79fe8f3371249029a19e48e817ffc34d12..3b299fb3855cc5384fb520beb18b6f6a904da511 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
 #include <linux/cpu.h>
+#include <linux/splice.h>
 
 /* list of open channels, for cpu hotplug */
 static DEFINE_MUTEX(relay_channels_mutex);
@@ -121,6 +122,7 @@ static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size)
                buf->page_array[i] = alloc_page(GFP_KERNEL);
                if (unlikely(!buf->page_array[i]))
                        goto depopulate;
+               set_page_private(buf->page_array[i], (unsigned long)buf);
        }
        mem = vmap(buf->page_array, n_pages, VM_MAP, PAGE_KERNEL);
        if (!mem)
@@ -970,43 +972,6 @@ static int subbuf_read_actor(size_t read_start,
        return ret;
 }
 
-/*
- *     subbuf_send_actor - send up to one subbuf's worth of data
- */
-static int subbuf_send_actor(size_t read_start,
-                            struct rchan_buf *buf,
-                            size_t avail,
-                            read_descriptor_t *desc,
-                            read_actor_t actor)
-{
-       unsigned long pidx, poff;
-       unsigned int subbuf_pages;
-       int ret = 0;
-
-       subbuf_pages = buf->chan->alloc_size >> PAGE_SHIFT;
-       pidx = (read_start / PAGE_SIZE) % subbuf_pages;
-       poff = read_start & ~PAGE_MASK;
-       while (avail) {
-               struct page *p = buf->page_array[pidx];
-               unsigned int len;
-
-               len = PAGE_SIZE - poff;
-               if (len > avail)
-                       len = avail;
-
-               len = actor(desc, p, poff, len);
-               if (desc->error)
-                       break;
-
-               avail -= len;
-               ret += len;
-               poff = 0;
-               pidx = (pidx + 1) % subbuf_pages;
-       }
-
-       return ret;
-}
-
 typedef int (*subbuf_actor_t) (size_t read_start,
                               struct rchan_buf *buf,
                               size_t avail,
@@ -1067,19 +1032,159 @@ static ssize_t relay_file_read(struct file *filp,
                                       NULL, &desc);
 }
 
-static ssize_t relay_file_sendfile(struct file *filp,
-                                  loff_t *ppos,
-                                  size_t count,
-                                  read_actor_t actor,
-                                  void *target)
+static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed)
 {
-       read_descriptor_t desc;
-       desc.written = 0;
-       desc.count = count;
-       desc.arg.data = target;
-       desc.error = 0;
-       return relay_file_read_subbufs(filp, ppos, subbuf_send_actor,
-                                      actor, &desc);
+       rbuf->bytes_consumed += bytes_consumed;
+
+       if (rbuf->bytes_consumed >= rbuf->chan->subbuf_size) {
+               relay_subbufs_consumed(rbuf->chan, rbuf->cpu, 1);
+               rbuf->bytes_consumed %= rbuf->chan->subbuf_size;
+       }
+}
+
+static void relay_pipe_buf_release(struct pipe_inode_info *pipe,
+                                  struct pipe_buffer *buf)
+{
+       struct rchan_buf *rbuf;
+
+       rbuf = (struct rchan_buf *)page_private(buf->page);
+       relay_consume_bytes(rbuf, buf->private);
+}
+
+static struct pipe_buf_operations relay_pipe_buf_ops = {
+       .can_merge = 0,
+       .map = generic_pipe_buf_map,
+       .unmap = generic_pipe_buf_unmap,
+       .confirm = generic_pipe_buf_confirm,
+       .release = relay_pipe_buf_release,
+       .steal = generic_pipe_buf_steal,
+       .get = generic_pipe_buf_get,
+};
+
+/**
+ *     subbuf_splice_actor - splice up to one subbuf's worth of data
+ */
+static int subbuf_splice_actor(struct file *in,
+                              loff_t *ppos,
+                              struct pipe_inode_info *pipe,
+                              size_t len,
+                              unsigned int flags,
+                              int *nonpad_ret)
+{
+       unsigned int pidx, poff, total_len, subbuf_pages, ret;
+       struct rchan_buf *rbuf = in->private_data;
+       unsigned int subbuf_size = rbuf->chan->subbuf_size;
+       size_t read_start = ((size_t)*ppos) % rbuf->chan->alloc_size;
+       size_t read_subbuf = read_start / subbuf_size;
+       size_t padding = rbuf->padding[read_subbuf];
+       size_t nonpad_end = read_subbuf * subbuf_size + subbuf_size - padding;
+       struct page *pages[PIPE_BUFFERS];
+       struct partial_page partial[PIPE_BUFFERS];
+       struct splice_pipe_desc spd = {
+               .pages = pages,
+               .nr_pages = 0,
+               .partial = partial,
+               .flags = flags,
+               .ops = &relay_pipe_buf_ops,
+       };
+
+       if (rbuf->subbufs_produced == rbuf->subbufs_consumed)
+               return 0;
+
+       /*
+        * Adjust read len, if longer than what is available
+        */
+       if (len > (subbuf_size - read_start % subbuf_size))
+               len = subbuf_size - read_start % subbuf_size;
+
+       subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT;
+       pidx = (read_start / PAGE_SIZE) % subbuf_pages;
+       poff = read_start & ~PAGE_MASK;
+
+       for (total_len = 0; spd.nr_pages < subbuf_pages; spd.nr_pages++) {
+               unsigned int this_len, this_end, private;
+               unsigned int cur_pos = read_start + total_len;
+
+               if (!len)
+                       break;
+
+               this_len = min_t(unsigned long, len, PAGE_SIZE - poff);
+               private = this_len;
+
+               spd.pages[spd.nr_pages] = rbuf->page_array[pidx];
+               spd.partial[spd.nr_pages].offset = poff;
+
+               this_end = cur_pos + this_len;
+               if (this_end >= nonpad_end) {
+                       this_len = nonpad_end - cur_pos;
+                       private = this_len + padding;
+               }
+               spd.partial[spd.nr_pages].len = this_len;
+               spd.partial[spd.nr_pages].private = private;
+
+               len -= this_len;
+               total_len += this_len;
+               poff = 0;
+               pidx = (pidx + 1) % subbuf_pages;
+
+               if (this_end >= nonpad_end) {
+                       spd.nr_pages++;
+                       break;
+               }
+       }
+
+       if (!spd.nr_pages)
+               return 0;
+
+       ret = *nonpad_ret = splice_to_pipe(pipe, &spd);
+       if (ret < 0 || ret < total_len)
+               return ret;
+
+        if (read_start + ret == nonpad_end)
+                ret += padding;
+
+        return ret;
+}
+
+static ssize_t relay_file_splice_read(struct file *in,
+                                     loff_t *ppos,
+                                     struct pipe_inode_info *pipe,
+                                     size_t len,
+                                     unsigned int flags)
+{
+       ssize_t spliced;
+       int ret;
+       int nonpad_ret = 0;
+
+       ret = 0;
+       spliced = 0;
+
+       while (len) {
+               ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret);
+               if (ret < 0)
+                       break;
+               else if (!ret) {
+                       if (spliced)
+                               break;
+                       if (flags & SPLICE_F_NONBLOCK) {
+                               ret = -EAGAIN;
+                               break;
+                       }
+               }
+
+               *ppos += ret;
+               if (ret > len)
+                       len = 0;
+               else
+                       len -= ret;
+               spliced += nonpad_ret;
+               nonpad_ret = 0;
+       }
+
+       if (spliced)
+               return spliced;
+
+       return ret;
 }
 
 const struct file_operations relay_file_operations = {
@@ -1089,7 +1194,7 @@ const struct file_operations relay_file_operations = {
        .read           = relay_file_read,
        .llseek         = no_llseek,
        .release        = relay_file_release,
-       .sendfile       = relay_file_sendfile,
+       .splice_read    = relay_file_splice_read,
 };
 EXPORT_SYMBOL_GPL(relay_file_operations);
 
index d1d9814f99ddd51f982799ca70fa86762d7b48c0..c6ebd9f912abbfb7298c99fcd7432a3636857fb1 100644 (file)
@@ -1245,26 +1245,6 @@ int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long o
        return written;
 }
 
-ssize_t generic_file_sendfile(struct file *in_file, loff_t *ppos,
-                        size_t count, read_actor_t actor, void *target)
-{
-       read_descriptor_t desc;
-
-       if (!count)
-               return 0;
-
-       desc.written = 0;
-       desc.count = count;
-       desc.arg.data = target;
-       desc.error = 0;
-
-       do_generic_file_read(in_file, ppos, &desc, actor);
-       if (desc.written)
-               return desc.written;
-       return desc.error;
-}
-EXPORT_SYMBOL(generic_file_sendfile);
-
 static ssize_t
 do_readahead(struct address_space *mapping, struct file *filp,
             unsigned long index, unsigned long nr)
index fa360e566d88815caafbeaf57533085f334612ab..65ffc321f0c0f223e4c6115e91c53d12d8319399 100644 (file)
@@ -159,28 +159,6 @@ xip_file_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 }
 EXPORT_SYMBOL_GPL(xip_file_read);
 
-ssize_t
-xip_file_sendfile(struct file *in_file, loff_t *ppos,
-            size_t count, read_actor_t actor, void *target)
-{
-       read_descriptor_t desc;
-
-       if (!count)
-               return 0;
-
-       desc.written = 0;
-       desc.count = count;
-       desc.arg.data = target;
-       desc.error = 0;
-
-       do_xip_mapping_read(in_file->f_mapping, &in_file->f_ra, in_file,
-                           ppos, &desc, actor);
-       if (desc.written)
-               return desc.written;
-       return desc.error;
-}
-EXPORT_SYMBOL_GPL(xip_file_sendfile);
-
 /*
  * __xip_unmap is invoked from xip_unmap and
  * xip_write
index b6aae2b33393d96d694145326c6591acf6f83b4b..0493e4d0bcaab2d5b0281446171b4de57598b8ee 100644 (file)
@@ -1100,9 +1100,9 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
         * Normally, filepage is NULL on entry, and either found
         * uptodate immediately, or allocated and zeroed, or read
         * in under swappage, which is then assigned to filepage.
-        * But shmem_prepare_write passes in a locked filepage,
-        * which may be found not uptodate by other callers too,
-        * and may need to be copied from the swappage read in.
+        * But shmem_readpage and shmem_prepare_write pass in a locked
+        * filepage, which may be found not uptodate by other callers
+        * too, and may need to be copied from the swappage read in.
         */
 repeat:
        if (!filepage)
@@ -1485,9 +1485,18 @@ static const struct inode_operations shmem_symlink_inode_operations;
 static const struct inode_operations shmem_symlink_inline_operations;
 
 /*
- * Normally tmpfs makes no use of shmem_prepare_write, but it
- * lets a tmpfs file be used read-write below the loop driver.
+ * Normally tmpfs avoids the use of shmem_readpage and shmem_prepare_write;
+ * but providing them allows a tmpfs file to be used for splice, sendfile, and
+ * below the loop driver, in the generic fashion that many filesystems support.
  */
+static int shmem_readpage(struct file *file, struct page *page)
+{
+       struct inode *inode = page->mapping->host;
+       int error = shmem_getpage(inode, page->index, &page, SGP_CACHE, NULL);
+       unlock_page(page);
+       return error;
+}
+
 static int
 shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
 {
@@ -1711,25 +1720,6 @@ static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count
        return desc.error;
 }
 
-static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos,
-                        size_t count, read_actor_t actor, void *target)
-{
-       read_descriptor_t desc;
-
-       if (!count)
-               return 0;
-
-       desc.written = 0;
-       desc.count = count;
-       desc.arg.data = target;
-       desc.error = 0;
-
-       do_shmem_file_read(in_file, ppos, &desc, actor);
-       if (desc.written)
-               return desc.written;
-       return desc.error;
-}
-
 static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
@@ -2386,6 +2376,7 @@ static const struct address_space_operations shmem_aops = {
        .writepage      = shmem_writepage,
        .set_page_dirty = __set_page_dirty_no_writeback,
 #ifdef CONFIG_TMPFS
+       .readpage       = shmem_readpage,
        .prepare_write  = shmem_prepare_write,
        .commit_write   = simple_commit_write,
 #endif
@@ -2399,7 +2390,8 @@ static const struct file_operations shmem_file_operations = {
        .read           = shmem_file_read,
        .write          = shmem_file_write,
        .fsync          = simple_sync_file,
-       .sendfile       = shmem_file_sendfile,
+       .splice_read    = generic_file_splice_read,
+       .splice_write   = generic_file_splice_write,
 #endif
 };
 
index 099a983797da329249942ccbbf279c92e8ac853b..c094583386fd2242cfc633e30235153e06d7cf35 100644 (file)
@@ -853,7 +853,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
        u32 priv_len, maj_stat;
        int pad, saved_len, remaining_len, offset;
 
-       rqstp->rq_sendfile_ok = 0;
+       rqstp->rq_splice_ok = 0;
 
        priv_len = svc_getnl(&buf->head[0]);
        if (rqstp->rq_deferred) {
index e673ef9939043edc4a2d7aac1bf0c745ebd22917..55ea6df069deeb2042b8b1b8dc4c5294d5bfa649 100644 (file)
@@ -814,7 +814,7 @@ svc_process(struct svc_rqst *rqstp)
        rqstp->rq_res.tail[0].iov_base = NULL;
        rqstp->rq_res.tail[0].iov_len = 0;
        /* Will be turned off only in gss privacy case: */
-       rqstp->rq_sendfile_ok = 1;
+       rqstp->rq_splice_ok = 1;
        /* tcp needs a space for the record length... */
        if (rqstp->rq_prot == IPPROTO_TCP)
                svc_putnl(resv, 0);