btrfs: use kvzalloc to allocate btrfs_fs_info
[sfrench/cifs-2.6.git] / fs / btrfs / super.c
index 3a4dce1536455416a4f609f99ec416b61811f592..4b817947e00f39e327a244f67bbde7ebf2d6b7e0 100644 (file)
 #include "tests/btrfs-tests.h"
 
 #include "qgroup.h"
-#include "backref.h"
 #define CREATE_TRACE_POINTS
 #include <trace/events/btrfs.h>
 
 static const struct super_operations btrfs_super_ops;
+
+/*
+ * Types for mounting the default subvolume and a subvolume explicitly
+ * requested by subvol=/path. That way the callchain is straightforward and we
+ * don't have to play tricks with the mount options and recursive calls to
+ * btrfs_mount.
+ *
+ * The new btrfs_root_fs_type also servers as a tag for the bdev_holder.
+ */
 static struct file_system_type btrfs_fs_type;
+static struct file_system_type btrfs_root_fs_type;
 
 static int btrfs_remount(struct super_block *sb, int *flags, char *data);
 
@@ -98,30 +107,6 @@ const char *btrfs_decode_error(int errno)
        return errstr;
 }
 
-/* btrfs handle error by forcing the filesystem readonly */
-static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
-{
-       struct super_block *sb = fs_info->sb;
-
-       if (sb_rdonly(sb))
-               return;
-
-       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
-               sb->s_flags |= SB_RDONLY;
-               btrfs_info(fs_info, "forced readonly");
-               /*
-                * Note that a running device replace operation is not
-                * canceled here although there is no way to update
-                * the progress. It would add the risk of a deadlock,
-                * therefore the canceling is omitted. The only penalty
-                * is that some I/O remains active until the procedure
-                * completes. The next time when the filesystem is
-                * mounted writeable again, the device replace
-                * operation continues.
-                */
-       }
-}
-
 /*
  * __btrfs_handle_fs_error decodes expected errors from the caller and
  * invokes the approciate error response.
@@ -168,8 +153,23 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
        set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
 
        /* Don't go through full error handling during mount */
-       if (sb->s_flags & SB_BORN)
-               btrfs_handle_error(fs_info);
+       if (!(sb->s_flags & SB_BORN))
+               return;
+
+       if (sb_rdonly(sb))
+               return;
+
+       /* btrfs handle error by forcing the filesystem readonly */
+       sb->s_flags |= SB_RDONLY;
+       btrfs_info(fs_info, "forced readonly");
+       /*
+        * Note that a running device replace operation is not canceled here
+        * although there is no way to update the progress. It would add the
+        * risk of a deadlock, therefore the canceling is omitted. The only
+        * penalty is that some I/O remains active until the procedure
+        * completes. The next time when the filesystem is mounted writeable
+        * again, the device replace operation continues.
+        */
 }
 
 #ifdef CONFIG_PRINTK
@@ -405,7 +405,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
                        unsigned long new_flags)
 {
        substring_t args[MAX_OPT_ARGS];
-       char *p, *num, *orig = NULL;
+       char *p, *num;
        u64 cache_gen;
        int intarg;
        int ret = 0;
@@ -428,16 +428,6 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
        if (!options)
                goto check;
 
-       /*
-        * strsep changes the string, duplicate it because parse_options
-        * gets called twice
-        */
-       options = kstrdup(options, GFP_KERNEL);
-       if (!options)
-               return -ENOMEM;
-
-       orig = options;
-
        while ((p = strsep(&options, ",")) != NULL) {
                int token;
                if (!*p)
@@ -454,7 +444,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
                case Opt_subvolrootid:
                case Opt_device:
                        /*
-                        * These are parsed by btrfs_parse_early_options
+                        * These are parsed by btrfs_parse_subvol_options
+                        * and btrfs_parse_early_options
                         * and can be happily ignored here.
                         */
                        break;
@@ -877,7 +868,6 @@ out:
                btrfs_info(info, "disk space caching is enabled");
        if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
                btrfs_info(info, "using free space tree");
-       kfree(orig);
        return ret;
 }
 
@@ -888,11 +878,60 @@ out:
  * only when we need to allocate a new super block.
  */
 static int btrfs_parse_early_options(const char *options, fmode_t flags,
-               void *holder, char **subvol_name, u64 *subvol_objectid,
-               struct btrfs_fs_devices **fs_devices)
+               void *holder, struct btrfs_fs_devices **fs_devices)
 {
        substring_t args[MAX_OPT_ARGS];
        char *device_name, *opts, *orig, *p;
+       int error = 0;
+
+       if (!options)
+               return 0;
+
+       /*
+        * strsep changes the string, duplicate it because btrfs_parse_options
+        * gets called later
+        */
+       opts = kstrdup(options, GFP_KERNEL);
+       if (!opts)
+               return -ENOMEM;
+       orig = opts;
+
+       while ((p = strsep(&opts, ",")) != NULL) {
+               int token;
+
+               if (!*p)
+                       continue;
+
+               token = match_token(p, tokens, args);
+               if (token == Opt_device) {
+                       device_name = match_strdup(&args[0]);
+                       if (!device_name) {
+                               error = -ENOMEM;
+                               goto out;
+                       }
+                       error = btrfs_scan_one_device(device_name,
+                                       flags, holder, fs_devices);
+                       kfree(device_name);
+                       if (error)
+                               goto out;
+               }
+       }
+
+out:
+       kfree(orig);
+       return error;
+}
+
+/*
+ * Parse mount options that are related to subvolume id
+ *
+ * The value is later passed to mount_subvol()
+ */
+static int btrfs_parse_subvol_options(const char *options, fmode_t flags,
+               char **subvol_name, u64 *subvol_objectid)
+{
+       substring_t args[MAX_OPT_ARGS];
+       char *opts, *orig, *p;
        char *num = NULL;
        int error = 0;
 
@@ -900,8 +939,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
                return 0;
 
        /*
-        * strsep changes the string, duplicate it because parse_options
-        * gets called twice
+        * strsep changes the string, duplicate it because
+        * btrfs_parse_early_options gets called later
         */
        opts = kstrdup(options, GFP_KERNEL);
        if (!opts)
@@ -940,18 +979,6 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
                case Opt_subvolrootid:
                        pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");
                        break;
-               case Opt_device:
-                       device_name = match_strdup(&args[0]);
-                       if (!device_name) {
-                               error = -ENOMEM;
-                               goto out;
-                       }
-                       error = btrfs_scan_one_device(device_name,
-                                       flags, holder, fs_devices);
-                       kfree(device_name);
-                       if (error)
-                               goto out;
-                       break;
                default:
                        break;
                }
@@ -1243,7 +1270,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
 static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
 {
        struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
-       char *compress_type;
+       const char *compress_type;
 
        if (btrfs_test_opt(info, DEGRADED))
                seq_puts(seq, ",degraded");
@@ -1259,12 +1286,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
                                             num_online_cpus() + 2, 8))
                seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
        if (btrfs_test_opt(info, COMPRESS)) {
-               if (info->compress_type == BTRFS_COMPRESS_ZLIB)
-                       compress_type = "zlib";
-               else if (info->compress_type == BTRFS_COMPRESS_LZO)
-                       compress_type = "lzo";
-               else
-                       compress_type = "zstd";
+               compress_type = btrfs_compress_type2str(info->compress_type);
                if (btrfs_test_opt(info, FORCE_COMPRESS))
                        seq_printf(seq, ",compress-force=%s", compress_type);
                else
@@ -1365,86 +1387,12 @@ static inline int is_subvolume_inode(struct inode *inode)
        return 0;
 }
 
-/*
- * This will add subvolid=0 to the argument string while removing any subvol=
- * and subvolid= arguments to make sure we get the top-level root for path
- * walking to the subvol we want.
- */
-static char *setup_root_args(char *args)
-{
-       char *buf, *dst, *sep;
-
-       if (!args)
-               return kstrdup("subvolid=0", GFP_KERNEL);
-
-       /* The worst case is that we add ",subvolid=0" to the end. */
-       buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1,
-                       GFP_KERNEL);
-       if (!buf)
-               return NULL;
-
-       while (1) {
-               sep = strchrnul(args, ',');
-               if (!strstarts(args, "subvol=") &&
-                   !strstarts(args, "subvolid=")) {
-                       memcpy(dst, args, sep - args);
-                       dst += sep - args;
-                       *dst++ = ',';
-               }
-               if (*sep)
-                       args = sep + 1;
-               else
-                       break;
-       }
-       strcpy(dst, "subvolid=0");
-
-       return buf;
-}
-
 static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
-                                  int flags, const char *device_name,
-                                  char *data)
+                                  const char *device_name, struct vfsmount *mnt)
 {
        struct dentry *root;
-       struct vfsmount *mnt = NULL;
-       char *newargs;
        int ret;
 
-       newargs = setup_root_args(data);
-       if (!newargs) {
-               root = ERR_PTR(-ENOMEM);
-               goto out;
-       }
-
-       mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs);
-       if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) {
-               if (flags & SB_RDONLY) {
-                       mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~SB_RDONLY,
-                                            device_name, newargs);
-               } else {
-                       mnt = vfs_kern_mount(&btrfs_fs_type, flags | SB_RDONLY,
-                                            device_name, newargs);
-                       if (IS_ERR(mnt)) {
-                               root = ERR_CAST(mnt);
-                               mnt = NULL;
-                               goto out;
-                       }
-
-                       down_write(&mnt->mnt_sb->s_umount);
-                       ret = btrfs_remount(mnt->mnt_sb, &flags, NULL);
-                       up_write(&mnt->mnt_sb->s_umount);
-                       if (ret < 0) {
-                               root = ERR_PTR(ret);
-                               goto out;
-                       }
-               }
-       }
-       if (IS_ERR(mnt)) {
-               root = ERR_CAST(mnt);
-               mnt = NULL;
-               goto out;
-       }
-
        if (!subvol_name) {
                if (!subvol_objectid) {
                        ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
@@ -1500,7 +1448,6 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
 
 out:
        mntput(mnt);
-       kfree(newargs);
        kfree(subvol_name);
        return root;
 }
@@ -1558,11 +1505,11 @@ static int setup_security_options(struct btrfs_fs_info *fs_info,
 /*
  * Find a superblock for the given device / mount point.
  *
- * Note:  This is based on get_sb_bdev from fs/super.c with a few additions
- *       for multiple device setup.  Make sure to keep it in sync.
+ * Note: This is based on mount_bdev from fs/super.c with a few additions
+ *       for multiple device setup.  Make sure to keep it in sync.
  */
-static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
-               const char *device_name, void *data)
+static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
+               int flags, const char *device_name, void *data)
 {
        struct block_device *bdev = NULL;
        struct super_block *s;
@@ -1570,27 +1517,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
        struct btrfs_fs_info *fs_info = NULL;
        struct security_mnt_opts new_sec_opts;
        fmode_t mode = FMODE_READ;
-       char *subvol_name = NULL;
-       u64 subvol_objectid = 0;
        int error = 0;
 
        if (!(flags & SB_RDONLY))
                mode |= FMODE_WRITE;
 
        error = btrfs_parse_early_options(data, mode, fs_type,
-                                         &subvol_name, &subvol_objectid,
                                          &fs_devices);
        if (error) {
-               kfree(subvol_name);
                return ERR_PTR(error);
        }
 
-       if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
-               /* mount_subvol() will free subvol_name. */
-               return mount_subvol(subvol_name, subvol_objectid, flags,
-                                   device_name, data);
-       }
-
        security_init_mnt_opts(&new_sec_opts);
        if (data) {
                error = parse_security_options(data, &new_sec_opts);
@@ -1608,7 +1545,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
         * it for searching for existing supers, so this lets us do that and
         * then open_ctree will properly initialize everything later.
         */
-       fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
+       fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
        if (!fs_info) {
                error = -ENOMEM;
                goto error_sec_opts;
@@ -1674,6 +1611,84 @@ error_sec_opts:
        return ERR_PTR(error);
 }
 
+/*
+ * Mount function which is called by VFS layer.
+ *
+ * In order to allow mounting a subvolume directly, btrfs uses mount_subtree()
+ * which needs vfsmount* of device's root (/).  This means device's root has to
+ * be mounted internally in any case.
+ *
+ * Operation flow:
+ *   1. Parse subvol id related options for later use in mount_subvol().
+ *
+ *   2. Mount device's root (/) by calling vfs_kern_mount().
+ *
+ *      NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the
+ *      first place. In order to avoid calling btrfs_mount() again, we use
+ *      different file_system_type which is not registered to VFS by
+ *      register_filesystem() (btrfs_root_fs_type). As a result,
+ *      btrfs_mount_root() is called. The return value will be used by
+ *      mount_subtree() in mount_subvol().
+ *
+ *   3. Call mount_subvol() to get the dentry of subvolume. Since there is
+ *      "btrfs subvolume set-default", mount_subvol() is called always.
+ */
+static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
+               const char *device_name, void *data)
+{
+       struct vfsmount *mnt_root;
+       struct dentry *root;
+       fmode_t mode = FMODE_READ;
+       char *subvol_name = NULL;
+       u64 subvol_objectid = 0;
+       int error = 0;
+
+       if (!(flags & SB_RDONLY))
+               mode |= FMODE_WRITE;
+
+       error = btrfs_parse_subvol_options(data, mode,
+                                         &subvol_name, &subvol_objectid);
+       if (error) {
+               kfree(subvol_name);
+               return ERR_PTR(error);
+       }
+
+       /* mount device's root (/) */
+       mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data);
+       if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) {
+               if (flags & SB_RDONLY) {
+                       mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
+                               flags & ~SB_RDONLY, device_name, data);
+               } else {
+                       mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
+                               flags | SB_RDONLY, device_name, data);
+                       if (IS_ERR(mnt_root)) {
+                               root = ERR_CAST(mnt_root);
+                               goto out;
+                       }
+
+                       down_write(&mnt_root->mnt_sb->s_umount);
+                       error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL);
+                       up_write(&mnt_root->mnt_sb->s_umount);
+                       if (error < 0) {
+                               root = ERR_PTR(error);
+                               mntput(mnt_root);
+                               goto out;
+                       }
+               }
+       }
+       if (IS_ERR(mnt_root)) {
+               root = ERR_CAST(mnt_root);
+               goto out;
+       }
+
+       /* mount_subvol() will free subvol_name and mnt_root */
+       root = mount_subvol(subvol_name, subvol_objectid, device_name, mnt_root);
+
+out:
+       return root;
+}
+
 static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
                                     int new_pool_size, int old_pool_size)
 {
@@ -1820,7 +1835,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
                        goto restore;
                }
 
-               if (!btrfs_check_rw_degradable(fs_info)) {
+               if (!btrfs_check_rw_degradable(fs_info, NULL)) {
                        btrfs_warn(fs_info,
                                "too many missing devices, writeable remount is not allowed");
                        ret = -EACCES;
@@ -1972,8 +1987,10 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
 
        rcu_read_lock();
        list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
-               if (!device->in_fs_metadata || !device->bdev ||
-                   device->is_tgtdev_for_dev_replace)
+               if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
+                                               &device->dev_state) ||
+                   !device->bdev ||
+                   test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
                        continue;
 
                if (i >= nr_devices)
@@ -2174,6 +2191,15 @@ static struct file_system_type btrfs_fs_type = {
        .kill_sb        = btrfs_kill_super,
        .fs_flags       = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
 };
+
+static struct file_system_type btrfs_root_fs_type = {
+       .owner          = THIS_MODULE,
+       .name           = "btrfs",
+       .mount          = btrfs_mount_root,
+       .kill_sb        = btrfs_kill_super,
+       .fs_flags       = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
+};
+
 MODULE_ALIAS_FS("btrfs");
 
 static int btrfs_control_open(struct inode *inode, struct file *file)
@@ -2207,11 +2233,11 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
        switch (cmd) {
        case BTRFS_IOC_SCAN_DEV:
                ret = btrfs_scan_one_device(vol->name, FMODE_READ,
-                                           &btrfs_fs_type, &fs_devices);
+                                           &btrfs_root_fs_type, &fs_devices);
                break;
        case BTRFS_IOC_DEVICES_READY:
                ret = btrfs_scan_one_device(vol->name, FMODE_READ,
-                                           &btrfs_fs_type, &fs_devices);
+                                           &btrfs_root_fs_type, &fs_devices);
                if (ret)
                        break;
                ret = !(fs_devices->num_devices == fs_devices->total_devices);
@@ -2269,7 +2295,7 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
        while (cur_devices) {
                head = &cur_devices->devices;
                list_for_each_entry(dev, head, dev_list) {
-                       if (dev->missing)
+                       if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
                                continue;
                        if (!dev->name)
                                continue;
@@ -2324,7 +2350,7 @@ static struct miscdevice btrfs_misc = {
 MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
 MODULE_ALIAS("devname:btrfs-control");
 
-static int btrfs_interface_init(void)
+static int __init btrfs_interface_init(void)
 {
        return misc_register(&btrfs_misc);
 }
@@ -2334,7 +2360,7 @@ static void btrfs_interface_exit(void)
        misc_deregister(&btrfs_misc);
 }
 
-static void btrfs_print_mod_info(void)
+static void __init btrfs_print_mod_info(void)
 {
        pr_info("Btrfs loaded, crc32c=%s"
 #ifdef CONFIG_BTRFS_DEBUG