Merge branch 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszer...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 13 Apr 2018 23:55:41 +0000 (16:55 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 13 Apr 2018 23:55:41 +0000 (16:55 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 13 Apr 2018 23:55:41 +0000 (16:55 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 13 Apr 2018 23:55:41 +0000 (16:55 -0700)
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt

index 6ea1e64d14647404ec9c5f7fbcb2d1559d50e803..961b287ef3233ef5bac93ab76f7ed30377d80e34 100644 (file)
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -14,9 +14,13 @@ The result will inevitably fail to look exactly like a normal
  filesystem for various technical reasons.  The expectation is that
  many use cases will be able to ignore these differences.
  
-This approach is 'hybrid' because the objects that appear in the
-filesystem do not all appear to belong to that filesystem.  In many
-cases an object accessed in the union will be indistinguishable
+
+Overlay objects
+---------------
+
+The overlay filesystem approach is 'hybrid', because the objects that
+appear in the filesystem do not always appear to belong to that filesystem.
+In many cases, an object accessed in the union will be indistinguishable
  from accessing the corresponding object from the original filesystem.
  This is most obvious from the 'st_dev' field returned by stat(2).
  
@@ -34,6 +38,19 @@ make the overlay mount more compliant with filesystem scanners and
  overlay objects will be distinguishable from the corresponding
  objects in the original filesystem.
  
+On 64bit systems, even if all overlay layers are not on the same
+underlying filesystem, the same compliant behavior could be achieved
+with the "xino" feature.  The "xino" feature composes a unique object
+identifier from the real object st_ino and an underlying fsid index.
+If all underlying filesystems support NFS file handles and export file
+handles with 32bit inode number encoding (e.g. ext4), overlay filesystem
+will use the high inode number bits for fsid.  Even when the underlying
+filesystem uses 64bit inode numbers, users can still enable the "xino"
+feature with the "-o xino=on" overlay mount option.  That is useful for the
+case of underlying filesystems like xfs and tmpfs, which use 64bit inode
+numbers, but are very unlikely to use the high inode number bit.
+
+
  Upper and Lower
  ---------------
  
@@ -290,10 +307,19 @@ Non-standard behavior
  ---------------------
  
  The copy_up operation essentially creates a new, identical file and
-moves it over to the old name.  The new file may be on a different
-filesystem, so both st_dev and st_ino of the file may change.
+moves it over to the old name.  Any open files referring to this inode
+will access the old data.
+
+The new file may be on a different filesystem, so both st_dev and st_ino
+of the real file may change.  The values of st_dev and st_ino returned by
+stat(2) on an overlay object are often not the same as the real file
+stat(2) values to prevent the values from changing on copy_up.
  
-Any open files referring to this inode will access the old data.
+Unless "xino" feature is enabled, when overlay layers are not all on the
+same underlying filesystem, the value of st_dev may be different for two
+non-directory objects in the same overlay filesystem and the value of
+st_ino for directory objects may be non persistent and could change even
+while the overlay filesystem is still mounted.
  
  Unless "inode index" feature is enabled, if a file with multiple hard
  links is copied up, then this will "break" the link.  Changes will not be
@@ -302,6 +328,7 @@ propagated to other names referring to the same inode.
  Unless "redirect_dir" feature is enabled, rename(2) on a lower or merged
  directory will fail with EXDEV.
  
+
  Changes to underlying filesystems
  ---------------------------------
  
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c

index 329a5d103846145714d443389508696632ce3b24..645158dc33f1fc86bfcca570361002c540068584 100644 (file)
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -435,6 +435,15 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
         if (IS_ERR_OR_NULL(result))
                 return ERR_PTR(-ESTALE);
  
+       /*
+        * If no acceptance criteria was specified by caller, a disconnected
+        * dentry is also accepatable. Callers may use this mode to query if
+        * file handle is stale or to get a reference to an inode without
+        * risking the high overhead caused by directory reconnect.
+        */
+       if (!acceptable)
+               return result;
+
         if (d_is_dir(result)) {
                 /*
                  * This request is for a directory.
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig

index ce6ff5a0a6e4e8b75d4f588981d9c710432bb52b..17032631c5cf62cc200c8bf2f897bb3e4c0c96cb 100644 (file)
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -86,3 +86,20 @@ config OVERLAY_FS_NFS_EXPORT
           case basis with the "nfs_export=on" mount option.
  
           Say N unless you fully understand the consequences.
+
+config OVERLAY_FS_XINO_AUTO
+       bool "Overlayfs: auto enable inode number mapping"
+       default n
+       depends on OVERLAY_FS
+       help
+         If this config option is enabled then overlay filesystems will use
+         unused high bits in undelying filesystem inode numbers to map all
+         inodes to a unified address space.  The mapped 64bit inode numbers
+         might not be compatible with applications that expect 32bit inodes.
+
+         If compatibility with applications that expect 32bit inodes is not an
+         issue, then it is safe and recommended to say Y here.
+
+         For more information, see Documentation/filesystems/overlayfs.txt
+
+         If unsure, say N.
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c

index d855f508fa209a011721c85f27828bf599302270..8bede0742619007c92c63cd1cc5e388a48d04463 100644 (file)
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -232,7 +232,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
         return err;
  }
  
-struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper)
+struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
  {
         struct ovl_fh *fh;
         int fh_type, fh_len, dwords;
@@ -300,7 +300,7 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
          * up and a pure upper inode.
          */
         if (ovl_can_decode_fh(lower->d_sb)) {
-               fh = ovl_encode_fh(lower, false);
+               fh = ovl_encode_real_fh(lower, false);
                 if (IS_ERR(fh))
                         return PTR_ERR(fh);
         }
@@ -321,7 +321,7 @@ static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index)
         const struct ovl_fh *fh;
         int err;
  
-       fh = ovl_encode_fh(upper, true);
+       fh = ovl_encode_real_fh(upper, true);
         if (IS_ERR(fh))
                 return PTR_ERR(fh);
  
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c

index 87bd4148f4fb5811547fa2b44a7965af7f22e52a..425a94672300c78fc69393851f35795305c81d48 100644 (file)
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -228,8 +228,8 @@ static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
                 goto fail;
  
         /* Encode an upper or lower file handle */
-       fh = ovl_encode_fh(enc_lower ? ovl_dentry_lower(dentry) :
-                                      ovl_dentry_upper(dentry), !enc_lower);
+       fh = ovl_encode_real_fh(enc_lower ? ovl_dentry_lower(dentry) :
+                               ovl_dentry_upper(dentry), !enc_lower);
         err = PTR_ERR(fh);
         if (IS_ERR(fh))
                 goto fail;
@@ -267,8 +267,8 @@ static int ovl_dentry_to_fh(struct dentry *dentry, u32 *fid, int *max_len)
         return OVL_FILEID;
  }
  
-static int ovl_encode_inode_fh(struct inode *inode, u32 *fid, int *max_len,
-                              struct inode *parent)
+static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
+                        struct inode *parent)
  {
         struct dentry *dentry;
         int type;
@@ -305,15 +305,12 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb,
         if (d_is_dir(upper ?: lower))
                 return ERR_PTR(-EIO);
  
-       inode = ovl_get_inode(sb, dget(upper), lower, index, !!lower);
+       inode = ovl_get_inode(sb, dget(upper), lowerpath, index, !!lower);
         if (IS_ERR(inode)) {
                 dput(upper);
                 return ERR_CAST(inode);
         }
  
-       if (index)
-               ovl_set_flag(OVL_INDEX, inode);
-
         dentry = d_find_any_alias(inode);
         if (!dentry) {
                 dentry = d_alloc_anon(inode->i_sb);
@@ -685,7 +682,7 @@ static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
         if (!ofs->upper_mnt)
                 return ERR_PTR(-EACCES);
  
-       upper = ovl_decode_fh(fh, ofs->upper_mnt);
+       upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true);
         if (IS_ERR_OR_NULL(upper))
                 return upper;
  
@@ -703,25 +700,39 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
         struct ovl_path *stack = &origin;
         struct dentry *dentry = NULL;
         struct dentry *index = NULL;
-       struct inode *inode = NULL;
-       bool is_deleted = false;
+       struct inode *inode;
         int err;
  
-       /* First lookup indexed upper by fh */
+       /* First lookup overlay inode in inode cache by origin fh */
+       err = ovl_check_origin_fh(ofs, fh, false, NULL, &stack);
+       if (err)
+               return ERR_PTR(err);
+
+       if (!d_is_dir(origin.dentry) ||
+           !(origin.dentry->d_flags & DCACHE_DISCONNECTED)) {
+               inode = ovl_lookup_inode(sb, origin.dentry, false);
+               err = PTR_ERR(inode);
+               if (IS_ERR(inode))
+                       goto out_err;
+               if (inode) {
+                       dentry = d_find_any_alias(inode);
+                       iput(inode);
+                       if (dentry)
+                               goto out;
+               }
+       }
+
+       /* Then lookup indexed upper/whiteout by origin fh */
         if (ofs->indexdir) {
                 index = ovl_get_index_fh(ofs, fh);
                 err = PTR_ERR(index);
                 if (IS_ERR(index)) {
-                       if (err != -ESTALE)
-                               return ERR_PTR(err);
-
-                       /* Found a whiteout index - treat as deleted inode */
-                       is_deleted = true;
                         index = NULL;
+                       goto out_err;
                 }
         }
  
-       /* Then try to get upper dir by index */
+       /* Then try to get a connected upper dir by index */
         if (index && d_is_dir(index)) {
                 struct dentry *upper = ovl_index_upper(ofs, index);
  
@@ -734,24 +745,19 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
                 goto out;
         }
  
-       /* Then lookup origin by fh */
-       err = ovl_check_origin_fh(ofs, fh, NULL, &stack);
-       if (err) {
-               goto out_err;
-       } else if (index) {
-               err = ovl_verify_origin(index, origin.dentry, false);
+       /* Otherwise, get a connected non-upper dir or disconnected non-dir */
+       if (d_is_dir(origin.dentry) &&
+           (origin.dentry->d_flags & DCACHE_DISCONNECTED)) {
+               dput(origin.dentry);
+               origin.dentry = NULL;
+               err = ovl_check_origin_fh(ofs, fh, true, NULL, &stack);
                 if (err)
                         goto out_err;
-       } else if (is_deleted) {
-               /* Lookup deleted non-dir by origin inode */
-               if (!d_is_dir(origin.dentry))
-                       inode = ovl_lookup_inode(sb, origin.dentry, false);
-               err = -ESTALE;
-               if (!inode || atomic_read(&inode->i_count) == 1)
+       }
+       if (index) {
+               err = ovl_verify_origin(index, origin.dentry, false);
+               if (err)
                         goto out_err;
-
-               /* Deleted but still open? */
-               index = dget(ovl_i_dentry_upper(inode));
         }
  
         dentry = ovl_get_dentry(sb, NULL, &origin, index);
@@ -759,7 +765,6 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
  out:
         dput(origin.dentry);
         dput(index);
-       iput(inode);
         return dentry;
  
  out_err:
@@ -829,7 +834,7 @@ static struct dentry *ovl_get_parent(struct dentry *dentry)
  }
  
  const struct export_operations ovl_export_operations = {
-       .encode_fh      = ovl_encode_inode_fh,
+       .encode_fh      = ovl_encode_fh,
         .fh_to_dentry   = ovl_fh_to_dentry,
         .fh_to_parent   = ovl_fh_to_parent,
         .get_name       = ovl_get_name,
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c

index 3b1bd469accdfe767afc276def9f319f57ebde68..6e3815fb006b8237a313e2b26069f6232c3d4609 100644 (file)
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -16,13 +16,6 @@
  #include "overlayfs.h"
  
  
-static dev_t ovl_get_pseudo_dev(struct dentry *dentry)
-{
-       struct ovl_entry *oe = dentry->d_fsdata;
-
-       return oe->lowerstack[0].layer->pseudo_dev;
-}
-
  int ovl_setattr(struct dentry *dentry, struct iattr *attr)
  {
         int err;
@@ -66,6 +59,69 @@ out:
         return err;
  }
  
+static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat,
+                          struct ovl_layer *lower_layer)
+{
+       bool samefs = ovl_same_sb(dentry->d_sb);
+       unsigned int xinobits = ovl_xino_bits(dentry->d_sb);
+
+       if (samefs) {
+               /*
+                * When all layers are on the same fs, all real inode
+                * number are unique, so we use the overlay st_dev,
+                * which is friendly to du -x.
+                */
+               stat->dev = dentry->d_sb->s_dev;
+               return 0;
+       } else if (xinobits) {
+               unsigned int shift = 64 - xinobits;
+               /*
+                * All inode numbers of underlying fs should not be using the
+                * high xinobits, so we use high xinobits to partition the
+                * overlay st_ino address space. The high bits holds the fsid
+                * (upper fsid is 0). This way overlay inode numbers are unique
+                * and all inodes use overlay st_dev. Inode numbers are also
+                * persistent for a given layer configuration.
+                */
+               if (stat->ino >> shift) {
+                       pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
+                                           dentry, stat->ino, xinobits);
+               } else {
+                       if (lower_layer)
+                               stat->ino |= ((u64)lower_layer->fsid) << shift;
+
+                       stat->dev = dentry->d_sb->s_dev;
+                       return 0;
+               }
+       }
+
+       /* The inode could not be mapped to a unified st_ino address space */
+       if (S_ISDIR(dentry->d_inode->i_mode)) {
+               /*
+                * Always use the overlay st_dev for directories, so 'find
+                * -xdev' will scan the entire overlay mount and won't cross the
+                * overlay mount boundaries.
+                *
+                * If not all layers are on the same fs the pair {real st_ino;
+                * overlay st_dev} is not unique, so use the non persistent
+                * overlay st_ino for directories.
+                */
+               stat->dev = dentry->d_sb->s_dev;
+               stat->ino = dentry->d_inode->i_ino;
+       } else if (lower_layer && lower_layer->fsid) {
+               /*
+                * For non-samefs setup, if we cannot map all layers st_ino
+                * to a unified address space, we need to make sure that st_dev
+                * is unique per lower fs. Upper layer uses real st_dev and
+                * lower layers use the unique anonymous bdev assigned to the
+                * lower fs.
+                */
+               stat->dev = lower_layer->fs->pseudo_dev;
+       }
+
+       return 0;
+}
+
  int ovl_getattr(const struct path *path, struct kstat *stat,
                 u32 request_mask, unsigned int flags)
  {
@@ -75,6 +131,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
         const struct cred *old_cred;
         bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
         bool samefs = ovl_same_sb(dentry->d_sb);
+       struct ovl_layer *lower_layer = NULL;
         int err;
  
         type = ovl_path_real(dentry, &realpath);
@@ -84,14 +141,18 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
                 goto out;
  
         /*
-        * For non-dir or same fs, we use st_ino of the copy up origin, if we
-        * know it. This guaranties constant st_dev/st_ino across copy up.
+        * For non-dir or same fs, we use st_ino of the copy up origin.
+        * This guaranties constant st_dev/st_ino across copy up.
+        * With xino feature and non-samefs, we use st_ino of the copy up
+        * origin masked with high bits that represent the layer id.
          *
-        * If filesystem supports NFS export ops, this also guaranties
+        * If lower filesystem supports NFS file handles, this also guaranties
          * persistent st_ino across mount cycle.
          */
-       if (!is_dir || samefs) {
-               if (OVL_TYPE_ORIGIN(type)) {
+       if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) {
+               if (!OVL_TYPE_UPPER(type)) {
+                       lower_layer = ovl_layer_lower(dentry);
+               } else if (OVL_TYPE_ORIGIN(type)) {
                         struct kstat lowerstat;
                         u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0);
  
@@ -118,43 +179,17 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
                          */
                         if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) ||
                             (!ovl_verify_lower(dentry->d_sb) &&
-                            (is_dir || lowerstat.nlink == 1)))
+                            (is_dir || lowerstat.nlink == 1))) {
                                 stat->ino = lowerstat.ino;
-
-                       if (samefs)
-                               WARN_ON_ONCE(stat->dev != lowerstat.dev);
-                       else
-                               stat->dev = ovl_get_pseudo_dev(dentry);
-               }
-               if (samefs) {
-                       /*
-                        * When all layers are on the same fs, all real inode
-                        * number are unique, so we use the overlay st_dev,
-                        * which is friendly to du -x.
-                        */
-                       stat->dev = dentry->d_sb->s_dev;
-               } else if (!OVL_TYPE_UPPER(type)) {
-                       /*
-                        * For non-samefs setup, to make sure that st_dev/st_ino
-                        * pair is unique across the system, we use a unique
-                        * anonymous st_dev for lower layer inode.
-                        */
-                       stat->dev = ovl_get_pseudo_dev(dentry);
+                               lower_layer = ovl_layer_lower(dentry);
+                       }
                 }
-       } else {
-               /*
-                * Always use the overlay st_dev for directories, so 'find
-                * -xdev' will scan the entire overlay mount and won't cross the
-                * overlay mount boundaries.
-                *
-                * If not all layers are on the same fs the pair {real st_ino;
-                * overlay st_dev} is not unique, so use the non persistent
-                * overlay st_ino for directories.
-                */
-               stat->dev = dentry->d_sb->s_dev;
-               stat->ino = dentry->d_inode->i_ino;
         }
  
+       err = ovl_map_dev_ino(dentry, stat, lower_layer);
+       if (err)
+               goto out;
+
         /*
          * It's probably not worth it to count subdirs to get the
          * correct link count.  nlink=1 seems to pacify 'find' and
@@ -383,24 +418,18 @@ int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
  
  int ovl_update_time(struct inode *inode, struct timespec *ts, int flags)
  {
-       struct dentry *alias;
-       struct path upperpath;
-
-       if (!(flags & S_ATIME))
-               return 0;
-
-       alias = d_find_any_alias(inode);
-       if (!alias)
-               return 0;
-
-       ovl_path_upper(alias, &upperpath);
-       if (upperpath.dentry) {
-               touch_atime(&upperpath);
-               inode->i_atime = d_inode(upperpath.dentry)->i_atime;
+       if (flags & S_ATIME) {
+               struct ovl_fs *ofs = inode->i_sb->s_fs_info;
+               struct path upperpath = {
+                       .mnt = ofs->upper_mnt,
+                       .dentry = ovl_upperdentry_dereference(OVL_I(inode)),
+               };
+
+               if (upperpath.dentry) {
+                       touch_atime(&upperpath);
+                       inode->i_atime = d_inode(upperpath.dentry)->i_atime;
+               }
         }
-
-       dput(alias);
-
         return 0;
  }
  
@@ -459,9 +488,27 @@ static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
  #endif
  }
  
-static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
+static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
+                          unsigned long ino, int fsid)
  {
-       inode->i_ino = get_next_ino();
+       int xinobits = ovl_xino_bits(inode->i_sb);
+
+       /*
+        * When NFS export is enabled and d_ino is consistent with st_ino
+        * (samefs or i_ino has enough bits to encode layer), set the same
+        * value used for d_ino to i_ino, because nfsd readdirplus compares
+        * d_ino values to i_ino values of child entries. When called from
+        * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real
+        * upper inode i_ino on ovl_inode_init() or ovl_inode_update().
+        */
+       if (inode->i_sb->s_export_op &&
+           (ovl_same_sb(inode->i_sb) || xinobits)) {
+               inode->i_ino = ino;
+               if (xinobits && fsid && !(ino >> (64 - xinobits)))
+                       inode->i_ino |= (unsigned long)fsid << (64 - xinobits);
+       } else {
+               inode->i_ino = get_next_ino();
+       }
         inode->i_mode = mode;
         inode->i_flags |= S_NOCMTIME;
  #ifdef CONFIG_FS_POSIX_ACL
@@ -597,7 +644,7 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
  
         inode = new_inode(sb);
         if (inode)
-               ovl_fill_inode(inode, mode, rdev);
+               ovl_fill_inode(inode, mode, rdev, 0, 0);
  
         return inode;
  }
@@ -703,13 +750,16 @@ static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper,
  }
  
  struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
-                           struct dentry *lowerdentry, struct dentry *index,
+                           struct ovl_path *lowerpath, struct dentry *index,
                             unsigned int numlower)
  {
         struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
         struct inode *inode;
+       struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL;
         bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, index);
+       int fsid = bylower ? lowerpath->layer->fsid : 0;
         bool is_dir;
+       unsigned long ino = 0;
  
         if (!realinode)
                 realinode = d_inode(lowerdentry);
@@ -748,18 +798,22 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
                 if (!is_dir)
                         nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink);
                 set_nlink(inode, nlink);
+               ino = key->i_ino;
         } else {
                 /* Lower hardlink that will be broken on copy up */
                 inode = new_inode(sb);
                 if (!inode)
                         goto out_nomem;
         }
-       ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
+       ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid);
         ovl_inode_init(inode, upperdentry, lowerdentry);
  
         if (upperdentry && ovl_is_impuredir(upperdentry))
                 ovl_set_flag(OVL_IMPURE, inode);
  
+       if (index)
+               ovl_set_flag(OVL_INDEX, inode);
+
         /* Check for non-merge dir that may have whiteouts */
         if (is_dir) {
                 if (((upperdentry && lowerdentry) || numlower > 1) ||
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c

index 70fcfcc684cc0a07566aeacb7fe564f2156bc0d2..2dba29eadde6b11d1d9211e921691b7d144d8a41 100644 (file)
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -56,6 +56,15 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
                         if (s == next)
                                 goto invalid;
                 }
+               /*
+                * One of the ancestor path elements in an absolute path
+                * lookup in ovl_lookup_layer() could have been opaque and
+                * that will stop further lookup in lower layers (d->stop=true)
+                * But we have found an absolute redirect in decendant path
+                * element and that should force continue lookup in lower
+                * layers (reset d->stop).
+                */
+               d->stop = false;
         } else {
                 if (strchr(buf, '/') != NULL)
                         goto invalid;
@@ -171,7 +180,8 @@ invalid:
         goto out;
  }
  
-struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt)
+struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
+                                 bool connected)
  {
         struct dentry *real;
         int bytes;
@@ -186,7 +196,7 @@ struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt)
         bytes = (fh->len - offsetof(struct ovl_fh, fid));
         real = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
                                   bytes >> 2, (int)fh->type,
-                                 ovl_acceptable, mnt);
+                                 connected ? ovl_acceptable : NULL, mnt);
         if (IS_ERR(real)) {
                 /*
                  * Treat stale file handle to lower file as "origin unknown".
@@ -220,6 +230,7 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
  {
         struct dentry *this;
         int err;
+       bool last_element = !post[0];
  
         this = lookup_one_len_unlocked(name, base, namelen);
         if (IS_ERR(this)) {
@@ -245,11 +256,23 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
                 d->stop = true;
                 if (d->is_dir)
                         goto put_and_out;
+
+               /*
+                * NB: handle failure to lookup non-last element when non-dir
+                * redirects become possible
+                */
+               WARN_ON(!last_element);
                 goto out;
         }
-       d->is_dir = true;
-       if (!d->last && ovl_is_opaquedir(this)) {
-               d->stop = d->opaque = true;
+       if (last_element)
+               d->is_dir = true;
+       if (d->last)
+               goto out;
+
+       if (ovl_is_opaquedir(this)) {
+               d->stop = true;
+               if (last_element)
+                       d->opaque = true;
                 goto out;
         }
         err = ovl_check_redirect(this, d, prelen, post);
@@ -310,14 +333,15 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
  }
  
  
-int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
                         struct dentry *upperdentry, struct ovl_path **stackp)
  {
         struct dentry *origin = NULL;
         int i;
  
         for (i = 0; i < ofs->numlower; i++) {
-               origin = ovl_decode_fh(fh, ofs->lower_layers[i].mnt);
+               origin = ovl_decode_real_fh(fh, ofs->lower_layers[i].mnt,
+                                           connected);
                 if (origin)
                         break;
         }
@@ -361,7 +385,7 @@ static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
         if (IS_ERR_OR_NULL(fh))
                 return PTR_ERR(fh);
  
-       err = ovl_check_origin_fh(ofs, fh, upperdentry, stackp);
+       err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp);
         kfree(fh);
  
         if (err) {
@@ -415,7 +439,7 @@ int ovl_verify_set_fh(struct dentry *dentry, const char *name,
         struct ovl_fh *fh;
         int err;
  
-       fh = ovl_encode_fh(real, is_upper);
+       fh = ovl_encode_real_fh(real, is_upper);
         err = PTR_ERR(fh);
         if (IS_ERR(fh))
                 goto fail;
@@ -451,7 +475,7 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
         if (IS_ERR_OR_NULL(fh))
                 return ERR_CAST(fh);
  
-       upper = ovl_decode_fh(fh, ofs->upper_mnt);
+       upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true);
         kfree(fh);
  
         if (IS_ERR_OR_NULL(upper))
@@ -558,7 +582,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
  
         /* Check if non-dir index is orphan and don't warn before cleaning it */
         if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
-               err = ovl_check_origin_fh(ofs, fh, index, &stack);
+               err = ovl_check_origin_fh(ofs, fh, false, index, &stack);
                 if (err)
                         goto fail;
  
@@ -619,7 +643,7 @@ int ovl_get_index_name(struct dentry *origin, struct qstr *name)
         struct ovl_fh *fh;
         int err;
  
-       fh = ovl_encode_fh(origin, false);
+       fh = ovl_encode_real_fh(origin, false);
         if (IS_ERR(fh))
                 return PTR_ERR(fh);
  
@@ -815,7 +839,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
                 .is_dir = false,
                 .opaque = false,
                 .stop = false,
-               .last = !poe->numlower,
+               .last = ofs->config.redirect_follow ? false : !poe->numlower,
                 .redirect = NULL,
         };
  
@@ -873,7 +897,11 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
         for (i = 0; !d.stop && i < poe->numlower; i++) {
                 struct ovl_path lower = poe->lowerstack[i];
  
-               d.last = i == poe->numlower - 1;
+               if (!ofs->config.redirect_follow)
+                       d.last = i == poe->numlower - 1;
+               else
+                       d.last = lower.layer->idx == roe->numlower;
+
                 err = ovl_lookup_layer(lower.dentry, &d, &this);
                 if (err)
                         goto out_put;
@@ -976,17 +1004,18 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
                 upperdentry = dget(index);
  
         if (upperdentry || ctr) {
-               if (ctr)
-                       origin = stack[0].dentry;
-               inode = ovl_get_inode(dentry->d_sb, upperdentry, origin, index,
+               inode = ovl_get_inode(dentry->d_sb, upperdentry, stack, index,
                                       ctr);
                 err = PTR_ERR(inode);
                 if (IS_ERR(inode))
                         goto out_free_oe;
  
+               /*
+                * NB: handle redirected hard links when non-dir redirects
+                * become possible
+                */
+               WARN_ON(OVL_I(inode)->redirect);
                 OVL_I(inode)->redirect = upperredirect;
-               if (index)
-                       ovl_set_flag(OVL_INDEX, inode);
         }
  
         revert_creds(old_cred);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h

index 225ff11711474fe80d9d4151f6267aaab587f034..e0b7de799f6b887a22cfe6a99faeec1430e50208 100644 (file)
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -202,7 +202,7 @@ void ovl_drop_write(struct dentry *dentry);
  struct dentry *ovl_workdir(struct dentry *dentry);
  const struct cred *ovl_override_creds(struct super_block *sb);
  struct super_block *ovl_same_sb(struct super_block *sb);
-bool ovl_can_decode_fh(struct super_block *sb);
+int ovl_can_decode_fh(struct super_block *sb);
  struct dentry *ovl_indexdir(struct super_block *sb);
  bool ovl_index_all(struct super_block *sb);
  bool ovl_verify_lower(struct super_block *sb);
@@ -215,6 +215,7 @@ void ovl_path_lower(struct dentry *dentry, struct path *path);
  enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
  struct dentry *ovl_dentry_upper(struct dentry *dentry);
  struct dentry *ovl_dentry_lower(struct dentry *dentry);
+struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
  struct dentry *ovl_dentry_real(struct dentry *dentry);
  struct dentry *ovl_i_dentry_upper(struct inode *inode);
  struct inode *ovl_inode_upper(struct inode *inode);
@@ -263,11 +264,19 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
         return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE);
  }
  
+static inline unsigned int ovl_xino_bits(struct super_block *sb)
+{
+       struct ovl_fs *ofs = sb->s_fs_info;
+
+       return ofs->xino_bits;
+}
+
  
  /* namei.c */
  int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
-struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt);
-int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
+                                 bool connected);
+int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
                         struct dentry *upperdentry, struct ovl_path **stackp);
  int ovl_verify_set_fh(struct dentry *dentry, const char *name,
                       struct dentry *real, bool is_upper, bool set);
@@ -329,7 +338,7 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
  struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
                                bool is_upper);
  struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
-                           struct dentry *lowerdentry, struct dentry *index,
+                           struct ovl_path *lowerpath, struct dentry *index,
                             unsigned int numlower);
  static inline void ovl_copyattr(struct inode *from, struct inode *to)
  {
@@ -361,7 +370,7 @@ int ovl_copy_up(struct dentry *dentry);
  int ovl_copy_up_flags(struct dentry *dentry, int flags);
  int ovl_copy_xattr(struct dentry *old, struct dentry *new);
  int ovl_set_attr(struct dentry *upper, struct kstat *stat);
-struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper);
+struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
  int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
                    struct dentry *upper);
  
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h

index bfef6edcc1112853e750a3a5468334aa39991465..41655a7d68947068e2e13b57739aa1bd7a1279eb 100644 (file)
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -18,13 +18,21 @@ struct ovl_config {
         const char *redirect_mode;
         bool index;
         bool nfs_export;
+       int xino;
+};
+
+struct ovl_sb {
+       struct super_block *sb;
+       dev_t pseudo_dev;
  };
  
  struct ovl_layer {
         struct vfsmount *mnt;
-       dev_t pseudo_dev;
-       /* Index of this layer in fs root (upper == 0) */
+       struct ovl_sb *fs;
+       /* Index of this layer in fs root (upper idx == 0) */
         int idx;
+       /* One fsid per unique underlying sb (upper fsid == 0) */
+       int fsid;
  };
  
  struct ovl_path {
@@ -35,8 +43,11 @@ struct ovl_path {
  /* private information held for overlayfs's superblock */
  struct ovl_fs {
         struct vfsmount *upper_mnt;
-       unsigned numlower;
+       unsigned int numlower;
+       /* Number of unique lower sb that differ from upper sb */
+       unsigned int numlowerfs;
         struct ovl_layer *lower_layers;
+       struct ovl_sb *lower_fs;
         /* workbasedir is the path at workdir= mount option */
         struct dentry *workbasedir;
         /* workdir is the 'work' directory under workbasedir */
@@ -50,11 +61,11 @@ struct ovl_fs {
         const struct cred *creator_cred;
         bool tmpfile;
         bool noxattr;
-       /* sb common to all layers */
-       struct super_block *same_sb;
         /* Did we take the inuse lock? */
         bool upperdir_locked;
         bool workdir_locked;
+       /* Inode numbers in all layers do not use the high xino_bits */
+       unsigned int xino_bits;
  };
  
  /* private information held for every overlayfs dentry */
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c

index c11f5c0906c39087978036892f2e11fa88af0299..ef1fe42ff7bb3a4e38e1d75714a1ef1bad4acf42 100644 (file)
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -120,6 +120,10 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
         if (!rdd->dentry)
                 return false;
  
+       /* Always recalc d_ino when remapping lower inode numbers */
+       if (ovl_xino_bits(rdd->dentry->d_sb))
+               return true;
+
         /* Always recalc d_ino for parent */
         if (strcmp(p->name, "..") == 0)
                 return true;
@@ -435,6 +439,19 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
         return cache;
  }
  
+/* Map inode number to lower fs unique range */
+static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
+                              const char *name, int namelen)
+{
+       if (ino >> (64 - xinobits)) {
+               pr_warn_ratelimited("overlayfs: d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
+                                   namelen, name, ino, xinobits);
+               return ino;
+       }
+
+       return ino | ((u64)fsid) << (64 - xinobits);
+}
+
  /*
   * Set d_ino for upper entries. Non-upper entries should always report
   * the uppermost real inode ino and should not call this function.
@@ -452,9 +469,10 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
         struct dentry *this = NULL;
         enum ovl_path_type type;
         u64 ino = p->real_ino;
+       int xinobits = ovl_xino_bits(dir->d_sb);
         int err = 0;
  
-       if (!ovl_same_sb(dir->d_sb))
+       if (!ovl_same_sb(dir->d_sb) && !xinobits)
                 goto out;
  
         if (p->name[0] == '.') {
@@ -491,6 +509,10 @@ get:
  
                 WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
                 ino = stat.ino;
+       } else if (xinobits && !OVL_TYPE_UPPER(type)) {
+               ino = ovl_remap_lower_ino(ino, xinobits,
+                                         ovl_layer_lower(this)->fsid,
+                                         p->name, p->len);
         }
  
  out:
@@ -618,6 +640,8 @@ struct ovl_readdir_translate {
         struct ovl_dir_cache *cache;
         struct dir_context ctx;
         u64 parent_ino;
+       int fsid;
+       int xinobits;
  };
  
  static int ovl_fill_real(struct dir_context *ctx, const char *name,
@@ -628,14 +652,17 @@ static int ovl_fill_real(struct dir_context *ctx, const char *name,
                 container_of(ctx, struct ovl_readdir_translate, ctx);
         struct dir_context *orig_ctx = rdt->orig_ctx;
  
-       if (rdt->parent_ino && strcmp(name, "..") == 0)
+       if (rdt->parent_ino && strcmp(name, "..") == 0) {
                 ino = rdt->parent_ino;
-       else if (rdt->cache) {
+       } else if (rdt->cache) {
                 struct ovl_cache_entry *p;
  
                 p = ovl_cache_entry_find(&rdt->cache->root, name, namelen);
                 if (p)
                         ino = p->ino;
+       } else if (rdt->xinobits) {
+               ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid,
+                                         name, namelen);
         }
  
         return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
@@ -646,11 +673,16 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
         int err;
         struct ovl_dir_file *od = file->private_data;
         struct dentry *dir = file->f_path.dentry;
+       struct ovl_layer *lower_layer = ovl_layer_lower(dir);
         struct ovl_readdir_translate rdt = {
                 .ctx.actor = ovl_fill_real,
                 .orig_ctx = ctx,
+               .xinobits = ovl_xino_bits(dir->d_sb),
         };
  
+       if (rdt.xinobits && lower_layer)
+               rdt.fsid = lower_layer->fsid;
+
         if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) {
                 struct kstat stat;
                 struct path statpath = file->f_path;
@@ -693,9 +725,10 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
                  * dir is impure then need to adjust d_ino for copied up
                  * entries.
                  */
-               if (ovl_same_sb(dentry->d_sb) &&
-                   (ovl_test_flag(OVL_IMPURE, d_inode(dentry)) ||
-                    OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent)))) {
+               if (ovl_xino_bits(dentry->d_sb) ||
+                   (ovl_same_sb(dentry->d_sb) &&
+                    (ovl_test_flag(OVL_IMPURE, d_inode(dentry)) ||
+                     OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) {
                         return ovl_iterate_real(file, ctx);
                 }
                 return iterate_dir(od->realfile, ctx);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c

index 7c24619ae7fc5229a5d3af6c9298b51064498325..e8551c97de51c0676416d982aad60b9ca72c0373 100644 (file)
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -17,6 +17,7 @@
  #include <linux/statfs.h>
  #include <linux/seq_file.h>
  #include <linux/posix_acl_xattr.h>
+#include <linux/exportfs.h>
  #include "overlayfs.h"
  
  MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
@@ -50,6 +51,11 @@ module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
  MODULE_PARM_DESC(ovl_nfs_export_def,
                  "Default to on or off for the NFS export feature");
  
+static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
+module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
+MODULE_PARM_DESC(ovl_xino_auto_def,
+                "Auto enable xino feature");
+
  static void ovl_entry_stack_free(struct ovl_entry *oe)
  {
         unsigned int i;
@@ -236,11 +242,12 @@ static void ovl_free_fs(struct ovl_fs *ofs)
         if (ofs->upperdir_locked)
                 ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
         mntput(ofs->upper_mnt);
-       for (i = 0; i < ofs->numlower; i++) {
+       for (i = 0; i < ofs->numlower; i++)
                 mntput(ofs->lower_layers[i].mnt);
-               free_anon_bdev(ofs->lower_layers[i].pseudo_dev);
-       }
+       for (i = 0; i < ofs->numlowerfs; i++)
+               free_anon_bdev(ofs->lower_fs[i].pseudo_dev);
         kfree(ofs->lower_layers);
+       kfree(ofs->lower_fs);
  
         kfree(ofs->config.lowerdir);
         kfree(ofs->config.upperdir);
@@ -325,6 +332,23 @@ static const char *ovl_redirect_mode_def(void)
         return ovl_redirect_dir_def ? "on" : "off";
  }
  
+enum {
+       OVL_XINO_OFF,
+       OVL_XINO_AUTO,
+       OVL_XINO_ON,
+};
+
+static const char * const ovl_xino_str[] = {
+       "off",
+       "auto",
+       "on",
+};
+
+static inline int ovl_xino_def(void)
+{
+       return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
+}
+
  /**
   * ovl_show_options
   *
@@ -350,6 +374,8 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
         if (ofs->config.nfs_export != ovl_nfs_export_def)
                 seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
                                                 "on" : "off");
+       if (ofs->config.xino != ovl_xino_def())
+               seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
         return 0;
  }
  
@@ -384,6 +410,9 @@ enum {
         OPT_INDEX_OFF,
         OPT_NFS_EXPORT_ON,
         OPT_NFS_EXPORT_OFF,
+       OPT_XINO_ON,
+       OPT_XINO_OFF,
+       OPT_XINO_AUTO,
         OPT_ERR,
  };
  
@@ -397,6 +426,9 @@ static const match_table_t ovl_tokens = {
         {OPT_INDEX_OFF,                 "index=off"},
         {OPT_NFS_EXPORT_ON,             "nfs_export=on"},
         {OPT_NFS_EXPORT_OFF,            "nfs_export=off"},
+       {OPT_XINO_ON,                   "xino=on"},
+       {OPT_XINO_OFF,                  "xino=off"},
+       {OPT_XINO_AUTO,                 "xino=auto"},
         {OPT_ERR,                       NULL}
  };
  
@@ -511,6 +543,18 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
                         config->nfs_export = false;
                         break;
  
+               case OPT_XINO_ON:
+                       config->xino = OVL_XINO_ON;
+                       break;
+
+               case OPT_XINO_OFF:
+                       config->xino = OVL_XINO_OFF;
+                       break;
+
+               case OPT_XINO_AUTO:
+                       config->xino = OVL_XINO_AUTO;
+                       break;
+
                 default:
                         pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
                         return -EINVAL;
@@ -700,6 +744,7 @@ static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
  static int ovl_lower_dir(const char *name, struct path *path,
                          struct ovl_fs *ofs, int *stack_depth, bool *remote)
  {
+       int fh_type;
         int err;
  
         err = ovl_mount_dir_noesc(name, path);
@@ -719,15 +764,19 @@ static int ovl_lower_dir(const char *name, struct path *path,
          * The inodes index feature and NFS export need to encode and decode
          * file handles, so they require that all layers support them.
          */
+       fh_type = ovl_can_decode_fh(path->dentry->d_sb);
         if ((ofs->config.nfs_export ||
-            (ofs->config.index && ofs->config.upperdir)) &&
-           !ovl_can_decode_fh(path->dentry->d_sb)) {
+            (ofs->config.index && ofs->config.upperdir)) && !fh_type) {
                 ofs->config.index = false;
                 ofs->config.nfs_export = false;
                 pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
                         name);
         }
  
+       /* Check if lower fs has 32bit inode numbers */
+       if (fh_type != FILEID_INO32_GEN)
+               ofs->xino_bits = 0;
+
         return 0;
  
  out_put:
@@ -951,6 +1000,7 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
  {
         struct vfsmount *mnt = ofs->upper_mnt;
         struct dentry *temp;
+       int fh_type;
         int err;
  
         err = mnt_want_write(mnt);
@@ -1000,12 +1050,16 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
         }
  
         /* Check if upper/work fs supports file handles */
-       if (ofs->config.index &&
-           !ovl_can_decode_fh(ofs->workdir->d_sb)) {
+       fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
+       if (ofs->config.index && !fh_type) {
                 ofs->config.index = false;
                 pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
         }
  
+       /* Check if upper fs has 32bit inode numbers */
+       if (fh_type != FILEID_INO32_GEN)
+               ofs->xino_bits = 0;
+
         /* NFS export of r/w mount depends on index */
         if (ofs->config.nfs_export && !ofs->config.index) {
                 pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
@@ -1108,6 +1162,35 @@ out:
         return err;
  }
  
+/* Get a unique fsid for the layer */
+static int ovl_get_fsid(struct ovl_fs *ofs, struct super_block *sb)
+{
+       unsigned int i;
+       dev_t dev;
+       int err;
+
+       /* fsid 0 is reserved for upper fs even with non upper overlay */
+       if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb)
+               return 0;
+
+       for (i = 0; i < ofs->numlowerfs; i++) {
+               if (ofs->lower_fs[i].sb == sb)
+                       return i + 1;
+       }
+
+       err = get_anon_bdev(&dev);
+       if (err) {
+               pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
+               return err;
+       }
+
+       ofs->lower_fs[ofs->numlowerfs].sb = sb;
+       ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev;
+       ofs->numlowerfs++;
+
+       return ofs->numlowerfs;
+}
+
  static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
                                 unsigned int numlower)
  {
@@ -1119,23 +1202,27 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
                                     GFP_KERNEL);
         if (ofs->lower_layers == NULL)
                 goto out;
+
+       ofs->lower_fs = kcalloc(numlower, sizeof(struct ovl_sb),
+                               GFP_KERNEL);
+       if (ofs->lower_fs == NULL)
+               goto out;
+
         for (i = 0; i < numlower; i++) {
                 struct vfsmount *mnt;
-               dev_t dev;
+               int fsid;
  
-               err = get_anon_bdev(&dev);
-               if (err) {
-                       pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
+               err = fsid = ovl_get_fsid(ofs, stack[i].mnt->mnt_sb);
+               if (err < 0)
                         goto out;
-               }
  
                 mnt = clone_private_mount(&stack[i]);
                 err = PTR_ERR(mnt);
                 if (IS_ERR(mnt)) {
                         pr_err("overlayfs: failed to clone lowerpath\n");
-                       free_anon_bdev(dev);
                         goto out;
                 }
+
                 /*
                  * Make lower layers R/O.  That way fchmod/fchown on lower file
                  * will fail instead of modifying lower fs.
@@ -1143,16 +1230,41 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
                 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
  
                 ofs->lower_layers[ofs->numlower].mnt = mnt;
-               ofs->lower_layers[ofs->numlower].pseudo_dev = dev;
                 ofs->lower_layers[ofs->numlower].idx = i + 1;
+               ofs->lower_layers[ofs->numlower].fsid = fsid;
+               if (fsid) {
+                       ofs->lower_layers[ofs->numlower].fs =
+                               &ofs->lower_fs[fsid - 1];
+               }
                 ofs->numlower++;
+       }
+
+       /*
+        * When all layers on same fs, overlay can use real inode numbers.
+        * With mount option "xino=on", mounter declares that there are enough
+        * free high bits in underlying fs to hold the unique fsid.
+        * If overlayfs does encounter underlying inodes using the high xino
+        * bits reserved for fsid, it emits a warning and uses the original
+        * inode number.
+        */
+       if (!ofs->numlowerfs || (ofs->numlowerfs == 1 && !ofs->upper_mnt)) {
+               ofs->xino_bits = 0;
+               ofs->config.xino = OVL_XINO_OFF;
+       } else if (ofs->config.xino == OVL_XINO_ON && !ofs->xino_bits) {
+               /*
+                * This is a roundup of number of bits needed for numlowerfs+1
+                * (i.e. ilog2(numlowerfs+1 - 1) + 1). fsid 0 is reserved for
+                * upper fs even with non upper overlay.
+                */
+               BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31);
+               ofs->xino_bits = ilog2(ofs->numlowerfs) + 1;
+       }
  
-               /* Check if all lower layers are on same sb */
-               if (i == 0)
-                       ofs->same_sb = mnt->mnt_sb;
-               else if (ofs->same_sb != mnt->mnt_sb)
-                       ofs->same_sb = NULL;
+       if (ofs->xino_bits) {
+               pr_info("overlayfs: \"xino\" feature enabled using %d upper inode bits.\n",
+                       ofs->xino_bits);
         }
+
         err = 0;
  out:
         return err;
@@ -1263,6 +1375,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
  
         ofs->config.index = ovl_index_def;
         ofs->config.nfs_export = ovl_nfs_export_def;
+       ofs->config.xino = ovl_xino_def();
         err = ovl_parse_opt((char *) data, &ofs->config);
         if (err)
                 goto out_err;
@@ -1276,6 +1389,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
  
         sb->s_stack_depth = 0;
         sb->s_maxbytes = MAX_LFS_FILESIZE;
+       /* Assume underlaying fs uses 32bit inodes unless proven otherwise */
+       if (ofs->config.xino != OVL_XINO_OFF)
+               ofs->xino_bits = BITS_PER_LONG - 32;
+
         if (ofs->config.upperdir) {
                 if (!ofs->config.workdir) {
                         pr_err("overlayfs: missing 'workdir'\n");
@@ -1305,8 +1422,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
         /* If the upper fs is nonexistent, we mark overlayfs r/o too */
         if (!ofs->upper_mnt)
                 sb->s_flags |= SB_RDONLY;
-       else if (ofs->upper_mnt->mnt_sb != ofs->same_sb)
-               ofs->same_sb = NULL;
  
         if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
                 err = ovl_get_indexdir(ofs, oe, &upperpath);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c

index 930784a266230ba4c51974ac681307aa7c82fbad..6f1078028c66b4aced7c94f04959a754b293645c 100644 (file)
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -47,13 +47,29 @@ struct super_block *ovl_same_sb(struct super_block *sb)
  {
         struct ovl_fs *ofs = sb->s_fs_info;
  
-       return ofs->same_sb;
+       if (!ofs->numlowerfs)
+               return ofs->upper_mnt->mnt_sb;
+       else if (ofs->numlowerfs == 1 && !ofs->upper_mnt)
+               return ofs->lower_fs[0].sb;
+       else
+               return NULL;
  }
  
-bool ovl_can_decode_fh(struct super_block *sb)
+/*
+ * Check if underlying fs supports file handles and try to determine encoding
+ * type, in order to deduce maximum inode number used by fs.
+ *
+ * Return 0 if file handles are not supported.
+ * Return 1 (FILEID_INO32_GEN) if fs uses the default 32bit inode encoding.
+ * Return -1 if fs uses a non default encoding with unknown inode size.
+ */
+int ovl_can_decode_fh(struct super_block *sb)
  {
-       return (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
-               !uuid_is_null(&sb->s_uuid));
+       if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry ||
+           uuid_is_null(&sb->s_uuid))
+               return 0;
+
+       return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN;
  }
  
  struct dentry *ovl_indexdir(struct super_block *sb)
@@ -172,6 +188,13 @@ struct dentry *ovl_dentry_lower(struct dentry *dentry)
         return oe->numlower ? oe->lowerstack[0].dentry : NULL;
  }
  
+struct ovl_layer *ovl_layer_lower(struct dentry *dentry)
+{
+       struct ovl_entry *oe = dentry->d_fsdata;
+
+       return oe->numlower ? oe->lowerstack[0].layer : NULL;
+}
+
  struct dentry *ovl_dentry_real(struct dentry *dentry)
  {
         return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry);
@@ -279,12 +302,16 @@ void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
  void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
                     struct dentry *lowerdentry)
  {
+       struct inode *realinode = d_inode(upperdentry ?: lowerdentry);
+
         if (upperdentry)
                 OVL_I(inode)->__upperdentry = upperdentry;
         if (lowerdentry)
                 OVL_I(inode)->lower = igrab(d_inode(lowerdentry));
  
-       ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode);
+       ovl_copyattr(realinode, inode);
+       if (!inode->i_ino)
+               inode->i_ino = realinode->i_ino;
  }
  
  void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
@@ -299,6 +326,8 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
         smp_wmb();
         OVL_I(inode)->__upperdentry = upperdentry;
         if (inode_unhashed(inode)) {
+               if (!inode->i_ino)
+                       inode->i_ino = upperinode->i_ino;
                 inode->i_private = upperinode;
                 __insert_inode_hash(inode, (unsigned long) upperinode);
         }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 13 Apr 2018 23:55:41 +0000 (16:55 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 13 Apr 2018 23:55:41 +0000 (16:55 -0700)
Documentation/filesystems/overlayfs.txt		patch \| blob \| history
fs/exportfs/expfs.c		patch \| blob \| history
fs/overlayfs/Kconfig		patch \| blob \| history
fs/overlayfs/copy_up.c		patch \| blob \| history
fs/overlayfs/export.c		patch \| blob \| history
fs/overlayfs/inode.c		patch \| blob \| history
fs/overlayfs/namei.c		patch \| blob \| history
fs/overlayfs/overlayfs.h		patch \| blob \| history
fs/overlayfs/ovl_entry.h		patch \| blob \| history
fs/overlayfs/readdir.c		patch \| blob \| history
fs/overlayfs/super.c		patch \| blob \| history
fs/overlayfs/util.c		patch \| blob \| history