btrfs: move btrfs_raid_mindev_errorvalues to btrfs_raid_attr table
[sfrench/cifs-2.6.git] / fs / btrfs / volumes.c
index 488935e66779faf064fc71782dbd22f5104b225d..7b3b235cf2144b4df053f621ca45862c53a5e8c0 100644 (file)
@@ -40,6 +40,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .tolerated_failures = 1,
                .devs_increment = 2,
                .ncopies        = 2,
+               .raid_name      = "raid10",
+               .bg_flag        = BTRFS_BLOCK_GROUP_RAID10,
+               .mindev_error   = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
        },
        [BTRFS_RAID_RAID1] = {
                .sub_stripes    = 1,
@@ -49,6 +52,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .tolerated_failures = 1,
                .devs_increment = 2,
                .ncopies        = 2,
+               .raid_name      = "raid1",
+               .bg_flag        = BTRFS_BLOCK_GROUP_RAID1,
+               .mindev_error   = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
        },
        [BTRFS_RAID_DUP] = {
                .sub_stripes    = 1,
@@ -58,6 +64,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .tolerated_failures = 0,
                .devs_increment = 1,
                .ncopies        = 2,
+               .raid_name      = "dup",
+               .bg_flag        = BTRFS_BLOCK_GROUP_DUP,
+               .mindev_error   = 0,
        },
        [BTRFS_RAID_RAID0] = {
                .sub_stripes    = 1,
@@ -67,6 +76,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .tolerated_failures = 0,
                .devs_increment = 1,
                .ncopies        = 1,
+               .raid_name      = "raid0",
+               .bg_flag        = BTRFS_BLOCK_GROUP_RAID0,
+               .mindev_error   = 0,
        },
        [BTRFS_RAID_SINGLE] = {
                .sub_stripes    = 1,
@@ -76,6 +88,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .tolerated_failures = 0,
                .devs_increment = 1,
                .ncopies        = 1,
+               .raid_name      = "single",
+               .bg_flag        = 0,
+               .mindev_error   = 0,
        },
        [BTRFS_RAID_RAID5] = {
                .sub_stripes    = 1,
@@ -85,6 +100,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .tolerated_failures = 1,
                .devs_increment = 1,
                .ncopies        = 2,
+               .raid_name      = "raid5",
+               .bg_flag        = BTRFS_BLOCK_GROUP_RAID5,
+               .mindev_error   = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
        },
        [BTRFS_RAID_RAID6] = {
                .sub_stripes    = 1,
@@ -94,33 +112,19 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .tolerated_failures = 2,
                .devs_increment = 1,
                .ncopies        = 3,
+               .raid_name      = "raid6",
+               .bg_flag        = BTRFS_BLOCK_GROUP_RAID6,
+               .mindev_error   = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
        },
 };
 
-const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
-       [BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
-       [BTRFS_RAID_RAID1]  = BTRFS_BLOCK_GROUP_RAID1,
-       [BTRFS_RAID_DUP]    = BTRFS_BLOCK_GROUP_DUP,
-       [BTRFS_RAID_RAID0]  = BTRFS_BLOCK_GROUP_RAID0,
-       [BTRFS_RAID_SINGLE] = 0,
-       [BTRFS_RAID_RAID5]  = BTRFS_BLOCK_GROUP_RAID5,
-       [BTRFS_RAID_RAID6]  = BTRFS_BLOCK_GROUP_RAID6,
-};
+const char *get_raid_name(enum btrfs_raid_types type)
+{
+       if (type >= BTRFS_NR_RAID_TYPES)
+               return NULL;
 
-/*
- * Table to convert BTRFS_RAID_* to the error code if minimum number of devices
- * condition is not met. Zero means there's no corresponding
- * BTRFS_ERROR_DEV_*_NOT_MET value.
- */
-const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES] = {
-       [BTRFS_RAID_RAID10] = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
-       [BTRFS_RAID_RAID1]  = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
-       [BTRFS_RAID_DUP]    = 0,
-       [BTRFS_RAID_RAID0]  = 0,
-       [BTRFS_RAID_SINGLE] = 0,
-       [BTRFS_RAID_RAID5]  = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
-       [BTRFS_RAID_RAID6]  = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
-};
+       return btrfs_raid_array[type].raid_name;
+}
 
 static int init_first_rw_device(struct btrfs_trans_handle *trans,
                                struct btrfs_fs_info *fs_info);
@@ -167,12 +171,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
  * may be used to exclude some operations from running concurrently without any
  * modifications to the list (see write_all_supers)
  *
- * volume_mutex
- * ------------
- * coarse lock owned by a mounted filesystem; used to exclude some operations
- * that cannot run in parallel and affect the higher-level properties of the
- * filesystem like: device add/deleting/resize/replace, or balance
- *
  * balance_mutex
  * -------------
  * protects balance structures (status, state) and context accessed from
@@ -1890,11 +1888,11 @@ static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
        } while (read_seqretry(&fs_info->profiles_lock, seq));
 
        for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
-               if (!(all_avail & btrfs_raid_group[i]))
+               if (!(all_avail & btrfs_raid_array[i].bg_flag))
                        continue;
 
                if (num_devices < btrfs_raid_array[i].devs_min) {
-                       int ret = btrfs_raid_mindev_error[i];
+                       int ret = btrfs_raid_array[i].mindev_error;
 
                        if (ret)
                                return ret;
@@ -1954,7 +1952,6 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
        u64 num_devices;
        int ret = 0;
 
-       mutex_lock(&fs_info->volume_mutex);
        mutex_lock(&uuid_mutex);
 
        num_devices = fs_devices->num_devices;
@@ -2068,7 +2065,6 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
 
 out:
        mutex_unlock(&uuid_mutex);
-       mutex_unlock(&fs_info->volume_mutex);
        return ret;
 
 error_undo:
@@ -2220,10 +2216,6 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
                struct btrfs_device *tmp;
 
                devices = &fs_info->fs_devices->devices;
-               /*
-                * It is safe to read the devices since the volume_mutex
-                * is held by the caller.
-                */
                list_for_each_entry(tmp, devices, dev_list) {
                        if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
                                        &tmp->dev_state) && !tmp->bdev) {
@@ -3212,24 +3204,12 @@ static void update_balance_args(struct btrfs_balance_control *bctl)
 }
 
 /*
- * Should be called with both balance and volume mutexes held to
- * serialize other volume operations (add_dev/rm_dev/resize) with
- * restriper.  Same goes for unset_balance_control.
+ * Clear the balance status in fs_info and delete the balance item from disk.
  */
-static void set_balance_control(struct btrfs_balance_control *bctl)
-{
-       struct btrfs_fs_info *fs_info = bctl->fs_info;
-
-       BUG_ON(fs_info->balance_ctl);
-
-       spin_lock(&fs_info->balance_lock);
-       fs_info->balance_ctl = bctl;
-       spin_unlock(&fs_info->balance_lock);
-}
-
-static void unset_balance_control(struct btrfs_fs_info *fs_info)
+static void reset_balance_state(struct btrfs_fs_info *fs_info)
 {
        struct btrfs_balance_control *bctl = fs_info->balance_ctl;
+       int ret;
 
        BUG_ON(!fs_info->balance_ctl);
 
@@ -3238,6 +3218,9 @@ static void unset_balance_control(struct btrfs_fs_info *fs_info)
        spin_unlock(&fs_info->balance_lock);
 
        kfree(bctl);
+       ret = del_balance_item(fs_info);
+       if (ret)
+               btrfs_handle_fs_error(fs_info, ret, NULL);
 }
 
 /*
@@ -3774,18 +3757,6 @@ static inline int balance_need_close(struct btrfs_fs_info *fs_info)
                 atomic_read(&fs_info->balance_cancel_req) == 0);
 }
 
-static void __cancel_balance(struct btrfs_fs_info *fs_info)
-{
-       int ret;
-
-       unset_balance_control(fs_info);
-       ret = del_balance_item(fs_info);
-       if (ret)
-               btrfs_handle_fs_error(fs_info, ret, NULL);
-
-       clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
-}
-
 /* Non-zero return value signifies invalidity */
 static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg,
                u64 allowed)
@@ -3796,7 +3767,7 @@ static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg,
 }
 
 /*
- * Should be called with both balance and volume mutexes held
+ * Should be called with balance mutexe held
  */
 int btrfs_balance(struct btrfs_balance_control *bctl,
                  struct btrfs_ioctl_balance_args *bargs)
@@ -3917,7 +3888,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
 
        if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
                BUG_ON(ret == -EEXIST);
-               set_balance_control(bctl);
+               BUG_ON(fs_info->balance_ctl);
+               spin_lock(&fs_info->balance_lock);
+               fs_info->balance_ctl = bctl;
+               spin_unlock(&fs_info->balance_lock);
        } else {
                BUG_ON(ret != -EEXIST);
                spin_lock(&fs_info->balance_lock);
@@ -3925,22 +3899,24 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
                spin_unlock(&fs_info->balance_lock);
        }
 
-       atomic_inc(&fs_info->balance_running);
+       ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
+       set_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
        mutex_unlock(&fs_info->balance_mutex);
 
        ret = __btrfs_balance(fs_info);
 
        mutex_lock(&fs_info->balance_mutex);
-       atomic_dec(&fs_info->balance_running);
+       clear_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
 
        if (bargs) {
                memset(bargs, 0, sizeof(*bargs));
-               update_ioctl_balance_args(fs_info, 0, bargs);
+               btrfs_update_ioctl_balance_args(fs_info, bargs);
        }
 
        if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
            balance_need_close(fs_info)) {
-               __cancel_balance(fs_info);
+               reset_balance_state(fs_info);
+               clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
        }
 
        wake_up(&fs_info->balance_wait_q);
@@ -3948,11 +3924,11 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
        return ret;
 out:
        if (bctl->flags & BTRFS_BALANCE_RESUME)
-               __cancel_balance(fs_info);
-       else {
+               reset_balance_state(fs_info);
+       else
                kfree(bctl);
-               clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
-       }
+       clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+
        return ret;
 }
 
@@ -3961,16 +3937,12 @@ static int balance_kthread(void *data)
        struct btrfs_fs_info *fs_info = data;
        int ret = 0;
 
-       mutex_lock(&fs_info->volume_mutex);
        mutex_lock(&fs_info->balance_mutex);
-
        if (fs_info->balance_ctl) {
                btrfs_info(fs_info, "continuing balance");
                ret = btrfs_balance(fs_info->balance_ctl, NULL);
        }
-
        mutex_unlock(&fs_info->balance_mutex);
-       mutex_unlock(&fs_info->volume_mutex);
 
        return ret;
 }
@@ -3979,12 +3951,12 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
 {
        struct task_struct *tsk;
 
-       spin_lock(&fs_info->balance_lock);
+       mutex_lock(&fs_info->balance_mutex);
        if (!fs_info->balance_ctl) {
-               spin_unlock(&fs_info->balance_lock);
+               mutex_unlock(&fs_info->balance_mutex);
                return 0;
        }
-       spin_unlock(&fs_info->balance_lock);
+       mutex_unlock(&fs_info->balance_mutex);
 
        if (btrfs_test_opt(fs_info, SKIP_BALANCE)) {
                btrfs_info(fs_info, "force skipping balance");
@@ -4050,15 +4022,26 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
        btrfs_balance_sys(leaf, item, &disk_bargs);
        btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
 
-       WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
+       /*
+        * This should never happen, as the paused balance state is recovered
+        * during mount without any chance of other exclusive ops to collide.
+        *
+        * This gives the exclusive op status to balance and keeps in paused
+        * state until user intervention (cancel or umount). If the ownership
+        * cannot be assigned, show a message but do not fail. The balance
+        * is in a paused state and must have fs_info::balance_ctl properly
+        * set up.
+        */
+       if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
+               btrfs_warn(fs_info,
+       "cannot set exclusive op status to balance, resume manually");
 
-       mutex_lock(&fs_info->volume_mutex);
        mutex_lock(&fs_info->balance_mutex);
-
-       set_balance_control(bctl);
-
+       BUG_ON(fs_info->balance_ctl);
+       spin_lock(&fs_info->balance_lock);
+       fs_info->balance_ctl = bctl;
+       spin_unlock(&fs_info->balance_lock);
        mutex_unlock(&fs_info->balance_mutex);
-       mutex_unlock(&fs_info->volume_mutex);
 out:
        btrfs_free_path(path);
        return ret;
@@ -4074,16 +4057,16 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
                return -ENOTCONN;
        }
 
-       if (atomic_read(&fs_info->balance_running)) {
+       if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
                atomic_inc(&fs_info->balance_pause_req);
                mutex_unlock(&fs_info->balance_mutex);
 
                wait_event(fs_info->balance_wait_q,
-                          atomic_read(&fs_info->balance_running) == 0);
+                          !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
 
                mutex_lock(&fs_info->balance_mutex);
                /* we are good with balance_ctl ripped off from under us */
-               BUG_ON(atomic_read(&fs_info->balance_running));
+               BUG_ON(test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
                atomic_dec(&fs_info->balance_pause_req);
        } else {
                ret = -ENOTCONN;
@@ -4095,38 +4078,48 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
 
 int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
 {
-       if (sb_rdonly(fs_info->sb))
-               return -EROFS;
-
        mutex_lock(&fs_info->balance_mutex);
        if (!fs_info->balance_ctl) {
                mutex_unlock(&fs_info->balance_mutex);
                return -ENOTCONN;
        }
 
+       /*
+        * A paused balance with the item stored on disk can be resumed at
+        * mount time if the mount is read-write. Otherwise it's still paused
+        * and we must not allow cancelling as it deletes the item.
+        */
+       if (sb_rdonly(fs_info->sb)) {
+               mutex_unlock(&fs_info->balance_mutex);
+               return -EROFS;
+       }
+
        atomic_inc(&fs_info->balance_cancel_req);
        /*
         * if we are running just wait and return, balance item is
         * deleted in btrfs_balance in this case
         */
-       if (atomic_read(&fs_info->balance_running)) {
+       if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
                mutex_unlock(&fs_info->balance_mutex);
                wait_event(fs_info->balance_wait_q,
-                          atomic_read(&fs_info->balance_running) == 0);
+                          !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
                mutex_lock(&fs_info->balance_mutex);
        } else {
-               /* __cancel_balance needs volume_mutex */
                mutex_unlock(&fs_info->balance_mutex);
-               mutex_lock(&fs_info->volume_mutex);
+               /*
+                * Lock released to allow other waiters to continue, we'll
+                * reexamine the status again.
+                */
                mutex_lock(&fs_info->balance_mutex);
 
-               if (fs_info->balance_ctl)
-                       __cancel_balance(fs_info);
-
-               mutex_unlock(&fs_info->volume_mutex);
+               if (fs_info->balance_ctl) {
+                       reset_balance_state(fs_info);
+                       clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+               }
        }
 
-       BUG_ON(fs_info->balance_ctl || atomic_read(&fs_info->balance_running));
+       BUG_ON(fs_info->balance_ctl ||
+               test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
        atomic_dec(&fs_info->balance_cancel_req);
        mutex_unlock(&fs_info->balance_mutex);
        return 0;