Merge branch 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszer...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Mar 2017 19:55:57 +0000 (11:55 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Mar 2017 20:02:42 +0000 (12:02 -0800)
Pull overlayfs updates from Miklos Szeredi:
 "Because copy up can take a long time, serialized copy ups could be a
  big performance bottleneck. This update allows concurrent copy up of
  regular files eliminating this potential problem.

  There are also minor fixes"

* 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs:
  ovl: drop CAP_SYS_RESOURCE from saved mounter's credentials
  ovl: properly implement sync_filesystem()
  ovl: concurrent copy up of regular files
  ovl: introduce copy up waitqueue
  ovl: copy up regular file using O_TMPFILE
  ovl: rearrange code in ovl_copy_up_locked()
  ovl: check if upperdir fs supports O_TMPFILE

1  2 
fs/overlayfs/copy_up.c
fs/overlayfs/super.c
fs/overlayfs/util.c

diff --combined fs/overlayfs/copy_up.c
index 2d4985018fdbe58e204bf60a988726ed9633cbe7,48eb8812ac5b2220e9b2420cfc8ba659a9cf3bfd..906ea6c93260179c9c4947abe97c35750162c3e3
  #include <linux/xattr.h>
  #include <linux/security.h>
  #include <linux/uaccess.h>
 -#include <linux/sched.h>
 +#include <linux/sched/signal.h>
 +#include <linux/cred.h>
  #include <linux/namei.h>
  #include <linux/fdtable.h>
  #include <linux/ratelimit.h>
  #include "overlayfs.h"
+ #include "ovl_entry.h"
  
  #define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
  
@@@ -233,12 -233,14 +234,14 @@@ int ovl_set_attr(struct dentry *upperde
  
  static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
                              struct dentry *dentry, struct path *lowerpath,
-                             struct kstat *stat, const char *link)
+                             struct kstat *stat, const char *link,
+                             struct kstat *pstat, bool tmpfile)
  {
        struct inode *wdir = workdir->d_inode;
        struct inode *udir = upperdir->d_inode;
        struct dentry *newdentry = NULL;
        struct dentry *upper = NULL;
+       struct dentry *temp = NULL;
        int err;
        const struct cred *old_creds = NULL;
        struct cred *new_creds = NULL;
                .link = link
        };
  
-       newdentry = ovl_lookup_temp(workdir, dentry);
-       err = PTR_ERR(newdentry);
-       if (IS_ERR(newdentry))
-               goto out;
        upper = lookup_one_len(dentry->d_name.name, upperdir,
                               dentry->d_name.len);
        err = PTR_ERR(upper);
        if (IS_ERR(upper))
-               goto out1;
+               goto out;
  
        err = security_inode_copy_up(dentry, &new_creds);
        if (err < 0)
-               goto out2;
+               goto out1;
  
        if (new_creds)
                old_creds = override_creds(new_creds);
  
-       err = ovl_create_real(wdir, newdentry, &cattr, NULL, true);
+       if (tmpfile)
+               temp = ovl_do_tmpfile(upperdir, stat->mode);
+       else
+               temp = ovl_lookup_temp(workdir, dentry);
+       err = PTR_ERR(temp);
+       if (IS_ERR(temp))
+               goto out1;
+       err = 0;
+       if (!tmpfile)
+               err = ovl_create_real(wdir, temp, &cattr, NULL, true);
  
        if (new_creds) {
                revert_creds(old_creds);
  
                ovl_path_upper(dentry, &upperpath);
                BUG_ON(upperpath.dentry != NULL);
-               upperpath.dentry = newdentry;
+               upperpath.dentry = temp;
+               if (tmpfile) {
+                       inode_unlock(udir);
+                       err = ovl_copy_up_data(lowerpath, &upperpath,
+                                              stat->size);
+                       inode_lock_nested(udir, I_MUTEX_PARENT);
+               } else {
+                       err = ovl_copy_up_data(lowerpath, &upperpath,
+                                              stat->size);
+               }
  
-               err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
                if (err)
                        goto out_cleanup;
        }
  
-       err = ovl_copy_xattr(lowerpath->dentry, newdentry);
+       err = ovl_copy_xattr(lowerpath->dentry, temp);
        if (err)
                goto out_cleanup;
  
-       inode_lock(newdentry->d_inode);
-       err = ovl_set_attr(newdentry, stat);
-       inode_unlock(newdentry->d_inode);
+       inode_lock(temp->d_inode);
+       err = ovl_set_attr(temp, stat);
+       inode_unlock(temp->d_inode);
        if (err)
                goto out_cleanup;
  
-       err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
+       if (tmpfile)
+               err = ovl_do_link(temp, udir, upper, true);
+       else
+               err = ovl_do_rename(wdir, temp, udir, upper, 0);
        if (err)
                goto out_cleanup;
  
+       newdentry = dget(tmpfile ? upper : temp);
        ovl_dentry_update(dentry, newdentry);
        ovl_inode_update(d_inode(dentry), d_inode(newdentry));
-       newdentry = NULL;
+       /* Restore timestamps on parent (best effort) */
+       ovl_set_timestamps(upperdir, pstat);
  out2:
-       dput(upper);
+       dput(temp);
  out1:
-       dput(newdentry);
+       dput(upper);
  out:
        return err;
  
  out_cleanup:
-       ovl_cleanup(wdir, newdentry);
+       if (!tmpfile)
+               ovl_cleanup(wdir, temp);
        goto out2;
  }
  
@@@ -338,6 -361,7 +362,7 @@@ static int ovl_copy_up_one(struct dentr
        struct dentry *lowerdentry = lowerpath->dentry;
        struct dentry *upperdir;
        const char *link = NULL;
+       struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
  
        if (WARN_ON(!workdir))
                return -EROFS;
        ovl_path_upper(parent, &parentpath);
        upperdir = parentpath.dentry;
  
 -      err = vfs_getattr(&parentpath, &pstat);
 +      err = vfs_getattr(&parentpath, &pstat,
 +                        STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
        if (err)
                return err;
  
                        return PTR_ERR(link);
        }
  
+       /* Should we copyup with O_TMPFILE or with workdir? */
+       if (S_ISREG(stat->mode) && ofs->tmpfile) {
+               err = ovl_copy_up_start(dentry);
+               /* err < 0: interrupted, err > 0: raced with another copy-up */
+               if (unlikely(err)) {
+                       pr_debug("ovl_copy_up_start(%pd2) = %i\n", dentry, err);
+                       if (err > 0)
+                               err = 0;
+                       goto out_done;
+               }
+               inode_lock_nested(upperdir->d_inode, I_MUTEX_PARENT);
+               err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
+                                        stat, link, &pstat, true);
+               inode_unlock(upperdir->d_inode);
+               ovl_copy_up_end(dentry);
+               goto out_done;
+       }
        err = -EIO;
        if (lock_rename(workdir, upperdir) != NULL) {
                pr_err("overlayfs: failed to lock workdir+upperdir\n");
        }
  
        err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
-                                stat, link);
-       if (!err) {
-               /* Restore timestamps on parent (best effort) */
-               ovl_set_timestamps(upperdir, &pstat);
-       }
+                                stat, link, &pstat, false);
  out_unlock:
        unlock_rename(workdir, upperdir);
+ out_done:
        do_delayed_call(&done);
  
        return err;
@@@ -411,8 -450,7 +452,8 @@@ int ovl_copy_up_flags(struct dentry *de
                }
  
                ovl_path_lower(next, &lowerpath);
 -              err = vfs_getattr(&lowerpath, &stat);
 +              err = vfs_getattr(&lowerpath, &stat,
 +                                STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
                /* maybe truncate regular file. this has no effect on dirs */
                if (flags & O_TRUNC)
                        stat.size = 0;
diff --combined fs/overlayfs/super.c
index 9aa37c2f7f7d0a877a06309702af3b81ce10c009,7b218818136779ced357489f4b88debc32575ece..c9e70d39c1ea1cafd730be5caaea6924a29e7c58
@@@ -7,7 -7,6 +7,7 @@@
   * the Free Software Foundation.
   */
  
 +#include <uapi/linux/magic.h>
  #include <linux/fs.h>
  #include <linux/namei.h>
  #include <linux/xattr.h>
@@@ -161,6 -160,25 +161,25 @@@ static void ovl_put_super(struct super_
        kfree(ufs);
  }
  
+ static int ovl_sync_fs(struct super_block *sb, int wait)
+ {
+       struct ovl_fs *ufs = sb->s_fs_info;
+       struct super_block *upper_sb;
+       int ret;
+       if (!ufs->upper_mnt)
+               return 0;
+       upper_sb = ufs->upper_mnt->mnt_sb;
+       if (!upper_sb->s_op->sync_fs)
+               return 0;
+       /* real inodes have already been synced by sync_filesystem(ovl_sb) */
+       down_read(&upper_sb->s_umount);
+       ret = upper_sb->s_op->sync_fs(upper_sb, wait);
+       up_read(&upper_sb->s_umount);
+       return ret;
+ }
  /**
   * ovl_statfs
   * @sb: The overlayfs super block
@@@ -223,6 -241,7 +242,7 @@@ static int ovl_remount(struct super_blo
  
  static const struct super_operations ovl_super_operations = {
        .put_super      = ovl_put_super,
+       .sync_fs        = ovl_sync_fs,
        .statfs         = ovl_statfs,
        .show_options   = ovl_show_options,
        .remount_fs     = ovl_remount,
@@@ -702,6 -721,7 +722,7 @@@ static int ovl_fill_super(struct super_
        unsigned int stacklen = 0;
        unsigned int i;
        bool remote = false;
+       struct cred *cred;
        int err;
  
        err = -ENOMEM;
        if (!ufs)
                goto out;
  
+       init_waitqueue_head(&ufs->copyup_wq);
        ufs->config.redirect_dir = ovl_redirect_dir_def;
        err = ovl_parse_opt((char *) data, &ufs->config);
        if (err)
                 * creation of workdir in previous step.
                 */
                if (ufs->workdir) {
+                       struct dentry *temp;
                        err = ovl_check_d_type_supported(&workpath);
                        if (err < 0)
                                goto out_put_workdir;
                         */
                        if (!err)
                                pr_warn("overlayfs: upper fs needs to support d_type.\n");
+                       /* Check if upper/work fs supports O_TMPFILE */
+                       temp = ovl_do_tmpfile(ufs->workdir, S_IFREG | 0);
+                       ufs->tmpfile = !IS_ERR(temp);
+                       if (ufs->tmpfile)
+                               dput(temp);
+                       else
+                               pr_warn("overlayfs: upper fs does not support tmpfile.\n");
                }
        }
  
        else
                sb->s_d_op = &ovl_dentry_operations;
  
-       ufs->creator_cred = prepare_creds();
-       if (!ufs->creator_cred)
+       ufs->creator_cred = cred = prepare_creds();
+       if (!cred)
                goto out_put_lower_mnt;
  
+       /* Never override disk quota limits or use reserved space */
+       cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
        err = -ENOMEM;
        oe = ovl_alloc_entry(numlower);
        if (!oe)
diff --combined fs/overlayfs/util.c
index 9dc1c0af586bad0332cd3550dbeebe3d5b8e441d,01157d6e8cfe055e95371a6f443381ddf1832aa4..1953986ee6bc221f555f4c53f8129f9c865f91e5
@@@ -10,8 -10,7 +10,9 @@@
  #include <linux/fs.h>
  #include <linux/mount.h>
  #include <linux/slab.h>
 +#include <linux/cred.h>
  #include <linux/xattr.h>
++#include <linux/sched/signal.h>
  #include "overlayfs.h"
  #include "ovl_entry.h"
  
@@@ -264,3 -263,33 +265,33 @@@ struct file *ovl_path_open(struct path 
  {
        return dentry_open(path, flags | O_NOATIME, current_cred());
  }
+ int ovl_copy_up_start(struct dentry *dentry)
+ {
+       struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+       struct ovl_entry *oe = dentry->d_fsdata;
+       int err;
+       spin_lock(&ofs->copyup_wq.lock);
+       err = wait_event_interruptible_locked(ofs->copyup_wq, !oe->copying);
+       if (!err) {
+               if (oe->__upperdentry)
+                       err = 1; /* Already copied up */
+               else
+                       oe->copying = true;
+       }
+       spin_unlock(&ofs->copyup_wq.lock);
+       return err;
+ }
+ void ovl_copy_up_end(struct dentry *dentry)
+ {
+       struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+       struct ovl_entry *oe = dentry->d_fsdata;
+       spin_lock(&ofs->copyup_wq.lock);
+       oe->copying = false;
+       wake_up_locked(&ofs->copyup_wq);
+       spin_unlock(&ofs->copyup_wq.lock);
+ }