btrfs: harden agaist duplicate fsid on scanned devices
[sfrench/cifs-2.6.git] / fs / btrfs / volumes.c
index 8c5ae9943d694ceeff3994a44eb169e348c855e0..fc9a3d8f62384a46fbdbdc77d937af6ba10a3355 100644 (file)
@@ -37,6 +37,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .tolerated_failures = 1,
                .devs_increment = 2,
                .ncopies        = 2,
+               .nparity        = 0,
                .raid_name      = "raid10",
                .bg_flag        = BTRFS_BLOCK_GROUP_RAID10,
                .mindev_error   = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
@@ -49,6 +50,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .tolerated_failures = 1,
                .devs_increment = 2,
                .ncopies        = 2,
+               .nparity        = 0,
                .raid_name      = "raid1",
                .bg_flag        = BTRFS_BLOCK_GROUP_RAID1,
                .mindev_error   = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
@@ -61,6 +63,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .tolerated_failures = 0,
                .devs_increment = 1,
                .ncopies        = 2,
+               .nparity        = 0,
                .raid_name      = "dup",
                .bg_flag        = BTRFS_BLOCK_GROUP_DUP,
                .mindev_error   = 0,
@@ -73,6 +76,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .tolerated_failures = 0,
                .devs_increment = 1,
                .ncopies        = 1,
+               .nparity        = 0,
                .raid_name      = "raid0",
                .bg_flag        = BTRFS_BLOCK_GROUP_RAID0,
                .mindev_error   = 0,
@@ -85,6 +89,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .tolerated_failures = 0,
                .devs_increment = 1,
                .ncopies        = 1,
+               .nparity        = 0,
                .raid_name      = "single",
                .bg_flag        = 0,
                .mindev_error   = 0,
@@ -96,7 +101,8 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .devs_min       = 2,
                .tolerated_failures = 1,
                .devs_increment = 1,
-               .ncopies        = 2,
+               .ncopies        = 1,
+               .nparity        = 1,
                .raid_name      = "raid5",
                .bg_flag        = BTRFS_BLOCK_GROUP_RAID5,
                .mindev_error   = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
@@ -108,7 +114,8 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .devs_min       = 3,
                .tolerated_failures = 2,
                .devs_increment = 1,
-               .ncopies        = 3,
+               .ncopies        = 1,
+               .nparity        = 2,
                .raid_name      = "raid6",
                .bg_flag        = BTRFS_BLOCK_GROUP_RAID6,
                .mindev_error   = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
@@ -850,6 +857,35 @@ static noinline struct btrfs_device *device_list_add(const char *path,
                        return ERR_PTR(-EEXIST);
                }
 
+               /*
+                * We are going to replace the device path for a given devid,
+                * make sure it's the same device if the device is mounted
+                */
+               if (device->bdev) {
+                       struct block_device *path_bdev;
+
+                       path_bdev = lookup_bdev(path);
+                       if (IS_ERR(path_bdev)) {
+                               mutex_unlock(&fs_devices->device_list_mutex);
+                               return ERR_CAST(path_bdev);
+                       }
+
+                       if (device->bdev != path_bdev) {
+                               bdput(path_bdev);
+                               mutex_unlock(&fs_devices->device_list_mutex);
+                               btrfs_warn_in_rcu(device->fs_info,
+                       "duplicate device fsid:devid for %pU:%llu old:%s new:%s",
+                                       disk_super->fsid, devid,
+                                       rcu_str_deref(device->name), path);
+                               return ERR_PTR(-EEXIST);
+                       }
+                       bdput(path_bdev);
+                       btrfs_info_in_rcu(device->fs_info,
+                               "device fsid %pU devid %llu moved old:%s new:%s",
+                               disk_super->fsid, devid,
+                               rcu_str_deref(device->name), path);
+               }
+
                name = rcu_string_strdup(path, GFP_NOFS);
                if (!name) {
                        mutex_unlock(&fs_devices->device_list_mutex);
@@ -4628,6 +4664,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        int devs_min;           /* min devs needed */
        int devs_increment;     /* ndevs has to be a multiple of this */
        int ncopies;            /* how many copies to data has */
+       int nparity;            /* number of stripes worth of bytes to
+                                  store parity information */
        int ret;
        u64 max_stripe_size;
        u64 max_chunk_size;
@@ -4654,6 +4692,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        devs_min = btrfs_raid_array[index].devs_min;
        devs_increment = btrfs_raid_array[index].devs_increment;
        ncopies = btrfs_raid_array[index].ncopies;
+       nparity = btrfs_raid_array[index].nparity;
 
        if (type & BTRFS_BLOCK_GROUP_DATA) {
                max_stripe_size = SZ_1G;
@@ -4783,30 +4822,22 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
         * this will have to be fixed for RAID1 and RAID10 over
         * more drives
         */
-       data_stripes = num_stripes / ncopies;
-
-       if (type & BTRFS_BLOCK_GROUP_RAID5)
-               data_stripes = num_stripes - 1;
-
-       if (type & BTRFS_BLOCK_GROUP_RAID6)
-               data_stripes = num_stripes - 2;
+       data_stripes = (num_stripes - nparity) / ncopies;
 
        /*
         * Use the number of data stripes to figure out how big this chunk
         * is really going to be in terms of logical address space,
-        * and compare that answer with the max chunk size
+        * and compare that answer with the max chunk size. If it's higher,
+        * we try to reduce stripe_size.
         */
        if (stripe_size * data_stripes > max_chunk_size) {
-               stripe_size = div_u64(max_chunk_size, data_stripes);
-
-               /* bump the answer up to a 16MB boundary */
-               stripe_size = round_up(stripe_size, SZ_16M);
-
                /*
-                * But don't go higher than the limits we found while searching
-                * for free extents
+                * Reduce stripe_size, round it up to a 16MB boundary again and
+                * then use it, unless it ends up being even bigger than the
+                * previous value we had already.
                 */
-               stripe_size = min(devices_info[ndevs - 1].max_avail,
+               stripe_size = min(round_up(div_u64(max_chunk_size,
+                                                  data_stripes), SZ_16M),
                                  stripe_size);
        }