btrfs: use existing cur_devices, cleanup btrfs_rm_device

[sfrench/cifs-2.6.git] / fs / btrfs / volumes.c
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c

index 07706c0a5781045d7310ed3a4eae9648831adbdc..a382d53c560af673acb3424de1620dd92ac6ab27 100644 (file)
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -40,6 +40,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                 .tolerated_failures = 1,
                 .devs_increment = 2,
                 .ncopies        = 2,
+               .raid_name      = "raid10",
+               .bg_flag        = BTRFS_BLOCK_GROUP_RAID10,
+               .mindev_error   = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
         },
         [BTRFS_RAID_RAID1] = {
                 .sub_stripes    = 1,
@@ -49,6 +52,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                 .tolerated_failures = 1,
                 .devs_increment = 2,
                 .ncopies        = 2,
+               .raid_name      = "raid1",
+               .bg_flag        = BTRFS_BLOCK_GROUP_RAID1,
+               .mindev_error   = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
         },
         [BTRFS_RAID_DUP] = {
                 .sub_stripes    = 1,
@@ -58,6 +64,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                 .tolerated_failures = 0,
                 .devs_increment = 1,
                 .ncopies        = 2,
+               .raid_name      = "dup",
+               .bg_flag        = BTRFS_BLOCK_GROUP_DUP,
+               .mindev_error   = 0,
         },
         [BTRFS_RAID_RAID0] = {
                 .sub_stripes    = 1,
@@ -67,6 +76,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                 .tolerated_failures = 0,
                 .devs_increment = 1,
                 .ncopies        = 1,
+               .raid_name      = "raid0",
+               .bg_flag        = BTRFS_BLOCK_GROUP_RAID0,
+               .mindev_error   = 0,
         },
         [BTRFS_RAID_SINGLE] = {
                 .sub_stripes    = 1,
@@ -76,6 +88,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                 .tolerated_failures = 0,
                 .devs_increment = 1,
                 .ncopies        = 1,
+               .raid_name      = "single",
+               .bg_flag        = 0,
+               .mindev_error   = 0,
         },
         [BTRFS_RAID_RAID5] = {
                 .sub_stripes    = 1,
@@ -85,6 +100,9 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                 .tolerated_failures = 1,
                 .devs_increment = 1,
                 .ncopies        = 2,
+               .raid_name      = "raid5",
+               .bg_flag        = BTRFS_BLOCK_GROUP_RAID5,
+               .mindev_error   = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
         },
         [BTRFS_RAID_RAID6] = {
                 .sub_stripes    = 1,
@@ -94,33 +112,19 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                 .tolerated_failures = 2,
                 .devs_increment = 1,
                 .ncopies        = 3,
+               .raid_name      = "raid6",
+               .bg_flag        = BTRFS_BLOCK_GROUP_RAID6,
+               .mindev_error   = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
         },
  };
  
-const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
-       [BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
-       [BTRFS_RAID_RAID1]  = BTRFS_BLOCK_GROUP_RAID1,
-       [BTRFS_RAID_DUP]    = BTRFS_BLOCK_GROUP_DUP,
-       [BTRFS_RAID_RAID0]  = BTRFS_BLOCK_GROUP_RAID0,
-       [BTRFS_RAID_SINGLE] = 0,
-       [BTRFS_RAID_RAID5]  = BTRFS_BLOCK_GROUP_RAID5,
-       [BTRFS_RAID_RAID6]  = BTRFS_BLOCK_GROUP_RAID6,
-};
+const char *get_raid_name(enum btrfs_raid_types type)
+{
+       if (type >= BTRFS_NR_RAID_TYPES)
+               return NULL;
  
-/*
- * Table to convert BTRFS_RAID_* to the error code if minimum number of devices
- * condition is not met. Zero means there's no corresponding
- * BTRFS_ERROR_DEV_*_NOT_MET value.
- */
-const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES] = {
-       [BTRFS_RAID_RAID10] = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
-       [BTRFS_RAID_RAID1]  = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
-       [BTRFS_RAID_DUP]    = 0,
-       [BTRFS_RAID_RAID0]  = 0,
-       [BTRFS_RAID_SINGLE] = 0,
-       [BTRFS_RAID_RAID5]  = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
-       [BTRFS_RAID_RAID6]  = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
-};
+       return btrfs_raid_array[type].raid_name;
+}
  
  static int init_first_rw_device(struct btrfs_trans_handle *trans,
                                 struct btrfs_fs_info *fs_info);
@@ -167,12 +171,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
   * may be used to exclude some operations from running concurrently without any
   * modifications to the list (see write_all_supers)
   *
- * volume_mutex
- * ------------
- * coarse lock owned by a mounted filesystem; used to exclude some operations
- * that cannot run in parallel and affect the higher-level properties of the
- * filesystem like: device add/deleting/resize/replace, or balance
- *
   * balance_mutex
   * -------------
   * protects balance structures (status, state) and context accessed from
@@ -1234,31 +1232,29 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
          */
         bytenr = btrfs_sb_offset(0);
         flags |= FMODE_EXCL;
-       mutex_lock(&uuid_mutex);
  
         bdev = blkdev_get_by_path(path, flags, holder);
-       if (IS_ERR(bdev)) {
-               ret = PTR_ERR(bdev);
-               goto error;
-       }
+       if (IS_ERR(bdev))
+               return PTR_ERR(bdev);
  
         if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) {
                 ret = -EINVAL;
                 goto error_bdev_put;
         }
  
+       mutex_lock(&uuid_mutex);
         device = device_list_add(path, disk_super);
         if (IS_ERR(device))
                 ret = PTR_ERR(device);
         else
                 *fs_devices_ret = device->fs_devices;
+       mutex_unlock(&uuid_mutex);
  
         btrfs_release_disk_super(page);
  
  error_bdev_put:
         blkdev_put(bdev, flags);
-error:
-       mutex_unlock(&uuid_mutex);
+
         return ret;
  }
  
@@ -1890,11 +1886,11 @@ static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
         } while (read_seqretry(&fs_info->profiles_lock, seq));
  
         for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
-               if (!(all_avail & btrfs_raid_group[i]))
+               if (!(all_avail & btrfs_raid_array[i].bg_flag))
                         continue;
  
                 if (num_devices < btrfs_raid_array[i].devs_min) {
-                       int ret = btrfs_raid_mindev_error[i];
+                       int ret = btrfs_raid_array[i].mindev_error;
  
                         if (ret)
                                 return ret;
@@ -2019,20 +2015,25 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
          * (super_copy) should hold the device list mutex.
          */
  
+       /*
+        * In normal cases the cur_devices == fs_devices. But in case
+        * of deleting a seed device, the cur_devices should point to
+        * its own fs_devices listed under the fs_devices->seed.
+        */
         cur_devices = device->fs_devices;
         mutex_lock(&fs_devices->device_list_mutex);
         list_del_rcu(&device->dev_list);
  
-       device->fs_devices->num_devices--;
-       device->fs_devices->total_devices--;
+       cur_devices->num_devices--;
+       cur_devices->total_devices--;
  
         if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
-               device->fs_devices->missing_devices--;
+               cur_devices->missing_devices--;
  
         btrfs_assign_next_active_device(fs_info, device, NULL);
  
         if (device->bdev) {
-               device->fs_devices->open_devices--;
+               cur_devices->open_devices--;
                 /* remove sysfs entry */
                 btrfs_sysfs_rm_device_link(fs_devices, device);
         }
@@ -2218,10 +2219,6 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
                 struct btrfs_device *tmp;
  
                 devices = &fs_info->fs_devices->devices;
-               /*
-                * It is safe to read the devices since the volume_mutex
-                * is held by the caller.
-                */
                 list_for_each_entry(tmp, devices, dev_list) {
                         if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
                                         &tmp->dev_state) && !tmp->bdev) {
@@ -3209,22 +3206,6 @@ static void update_balance_args(struct btrfs_balance_control *bctl)
         }
  }
  
-/*
- * Should be called with both balance and volume mutexes held to
- * serialize other volume operations (add_dev/rm_dev/resize) with
- * restriper.  Same goes for reset_balance_state.
- */
-static void set_balance_control(struct btrfs_balance_control *bctl)
-{
-       struct btrfs_fs_info *fs_info = bctl->fs_info;
-
-       BUG_ON(fs_info->balance_ctl);
-
-       spin_lock(&fs_info->balance_lock);
-       fs_info->balance_ctl = bctl;
-       spin_unlock(&fs_info->balance_lock);
-}
-
  /*
   * Clear the balance status in fs_info and delete the balance item from disk.
   */
@@ -3789,12 +3770,12 @@ static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg,
  }
  
  /*
- * Should be called with both balance and volume mutexes held
+ * Should be called with balance mutexe held
   */
-int btrfs_balance(struct btrfs_balance_control *bctl,
+int btrfs_balance(struct btrfs_fs_info *fs_info,
+                 struct btrfs_balance_control *bctl,
                   struct btrfs_ioctl_balance_args *bargs)
  {
-       struct btrfs_fs_info *fs_info = bctl->fs_info;
         u64 meta_target, data_target;
         u64 allowed;
         int mixed = 0;
@@ -3910,7 +3891,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
  
         if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
                 BUG_ON(ret == -EEXIST);
-               set_balance_control(bctl);
+               BUG_ON(fs_info->balance_ctl);
+               spin_lock(&fs_info->balance_lock);
+               fs_info->balance_ctl = bctl;
+               spin_unlock(&fs_info->balance_lock);
         } else {
                 BUG_ON(ret != -EEXIST);
                 spin_lock(&fs_info->balance_lock);
@@ -3918,17 +3902,18 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
                 spin_unlock(&fs_info->balance_lock);
         }
  
-       atomic_inc(&fs_info->balance_running);
+       ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
+       set_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
         mutex_unlock(&fs_info->balance_mutex);
  
         ret = __btrfs_balance(fs_info);
  
         mutex_lock(&fs_info->balance_mutex);
-       atomic_dec(&fs_info->balance_running);
+       clear_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
  
         if (bargs) {
                 memset(bargs, 0, sizeof(*bargs));
-               update_ioctl_balance_args(fs_info, 0, bargs);
+               btrfs_update_ioctl_balance_args(fs_info, bargs);
         }
  
         if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
@@ -3955,16 +3940,12 @@ static int balance_kthread(void *data)
         struct btrfs_fs_info *fs_info = data;
         int ret = 0;
  
-       mutex_lock(&fs_info->volume_mutex);
         mutex_lock(&fs_info->balance_mutex);
-
         if (fs_info->balance_ctl) {
                 btrfs_info(fs_info, "continuing balance");
-               ret = btrfs_balance(fs_info->balance_ctl, NULL);
+               ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
         }
-
         mutex_unlock(&fs_info->balance_mutex);
-       mutex_unlock(&fs_info->volume_mutex);
  
         return ret;
  }
@@ -3973,12 +3954,12 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
  {
         struct task_struct *tsk;
  
-       spin_lock(&fs_info->balance_lock);
+       mutex_lock(&fs_info->balance_mutex);
         if (!fs_info->balance_ctl) {
-               spin_unlock(&fs_info->balance_lock);
+               mutex_unlock(&fs_info->balance_mutex);
                 return 0;
         }
-       spin_unlock(&fs_info->balance_lock);
+       mutex_unlock(&fs_info->balance_mutex);
  
         if (btrfs_test_opt(fs_info, SKIP_BALANCE)) {
                 btrfs_info(fs_info, "force skipping balance");
@@ -4033,7 +4014,6 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
         leaf = path->nodes[0];
         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
  
-       bctl->fs_info = fs_info;
         bctl->flags = btrfs_balance_flags(leaf, item);
         bctl->flags |= BTRFS_BALANCE_RESUME;
  
@@ -4058,13 +4038,12 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
                 btrfs_warn(fs_info,
         "cannot set exclusive op status to balance, resume manually");
  
-       mutex_lock(&fs_info->volume_mutex);
         mutex_lock(&fs_info->balance_mutex);
-
-       set_balance_control(bctl);
-
+       BUG_ON(fs_info->balance_ctl);
+       spin_lock(&fs_info->balance_lock);
+       fs_info->balance_ctl = bctl;
+       spin_unlock(&fs_info->balance_lock);
         mutex_unlock(&fs_info->balance_mutex);
-       mutex_unlock(&fs_info->volume_mutex);
  out:
         btrfs_free_path(path);
         return ret;
@@ -4080,16 +4059,16 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
                 return -ENOTCONN;
         }
  
-       if (atomic_read(&fs_info->balance_running)) {
+       if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
                 atomic_inc(&fs_info->balance_pause_req);
                 mutex_unlock(&fs_info->balance_mutex);
  
                 wait_event(fs_info->balance_wait_q,
-                          atomic_read(&fs_info->balance_running) == 0);
+                          !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
  
                 mutex_lock(&fs_info->balance_mutex);
                 /* we are good with balance_ctl ripped off from under us */
-               BUG_ON(atomic_read(&fs_info->balance_running));
+               BUG_ON(test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
                 atomic_dec(&fs_info->balance_pause_req);
         } else {
                 ret = -ENOTCONN;
@@ -4101,40 +4080,48 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
  
  int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
  {
-       if (sb_rdonly(fs_info->sb))
-               return -EROFS;
-
         mutex_lock(&fs_info->balance_mutex);
         if (!fs_info->balance_ctl) {
                 mutex_unlock(&fs_info->balance_mutex);
                 return -ENOTCONN;
         }
  
+       /*
+        * A paused balance with the item stored on disk can be resumed at
+        * mount time if the mount is read-write. Otherwise it's still paused
+        * and we must not allow cancelling as it deletes the item.
+        */
+       if (sb_rdonly(fs_info->sb)) {
+               mutex_unlock(&fs_info->balance_mutex);
+               return -EROFS;
+       }
+
         atomic_inc(&fs_info->balance_cancel_req);
         /*
          * if we are running just wait and return, balance item is
          * deleted in btrfs_balance in this case
          */
-       if (atomic_read(&fs_info->balance_running)) {
+       if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
                 mutex_unlock(&fs_info->balance_mutex);
                 wait_event(fs_info->balance_wait_q,
-                          atomic_read(&fs_info->balance_running) == 0);
+                          !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
                 mutex_lock(&fs_info->balance_mutex);
         } else {
-               /* reset_balance_state needs volume_mutex */
                 mutex_unlock(&fs_info->balance_mutex);
-               mutex_lock(&fs_info->volume_mutex);
+               /*
+                * Lock released to allow other waiters to continue, we'll
+                * reexamine the status again.
+                */
                 mutex_lock(&fs_info->balance_mutex);
  
                 if (fs_info->balance_ctl) {
                         reset_balance_state(fs_info);
                         clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
                 }
-
-               mutex_unlock(&fs_info->volume_mutex);
         }
  
-       BUG_ON(fs_info->balance_ctl || atomic_read(&fs_info->balance_running));
+       BUG_ON(fs_info->balance_ctl ||
+               test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
         atomic_dec(&fs_info->balance_cancel_req);
         mutex_unlock(&fs_info->balance_mutex);
         return 0;
@@ -4429,7 +4416,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
         if (!path)
                 return -ENOMEM;
  
-       path->reada = READA_FORWARD;
+       path->reada = READA_BACK;
  
         mutex_lock(&fs_info->chunk_mutex);
  
@@ -5990,9 +5977,8 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
         return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
  }
  
-int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
-                    u64 chunk_start, u64 physical, u64 devid,
-                    u64 **logical, int *naddrs, int *stripe_len)
+int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
+                    u64 physical, u64 **logical, int *naddrs, int *stripe_len)
  {
         struct extent_map *em;
         struct map_lookup *map;
@@ -6024,8 +6010,6 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
         BUG_ON(!buf); /* -ENOMEM */
  
         for (i = 0; i < map->num_stripes; i++) {
-               if (devid && map->stripes[i].dev->devid != devid)
-                       continue;
                 if (map->stripes[i].physical > physical ||
                     map->stripes[i].physical + length <= physical)
                         continue;