Merge branch 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / fs / btrfs / volumes.c
index a256842875017b386a2a349f734f5feff7391484..b5036bd69e6a6432bf014b5fb8fb9e0c5baf495e 100644 (file)
@@ -145,6 +145,71 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
                             struct btrfs_bio **bbio_ret,
                             int mirror_num, int need_raid_map);
 
+/*
+ * Device locking
+ * ==============
+ *
+ * There are several mutexes that protect manipulation of devices and low-level
+ * structures like chunks but not block groups, extents or files
+ *
+ * uuid_mutex (global lock)
+ * ------------------------
+ * protects the fs_uuids list that tracks all per-fs fs_devices, resulting from
+ * the SCAN_DEV ioctl registration or from mount either implicitly (the first
+ * device) or requested by the device= mount option
+ *
+ * the mutex can be very coarse and can cover long-running operations
+ *
+ * protects: updates to fs_devices counters like missing devices, rw devices,
+ * seeding, structure cloning, openning/closing devices at mount/umount time
+ *
+ * global::fs_devs - add, remove, updates to the global list
+ *
+ * does not protect: manipulation of the fs_devices::devices list!
+ *
+ * btrfs_device::name - renames (write side), read is RCU
+ *
+ * fs_devices::device_list_mutex (per-fs, with RCU)
+ * ------------------------------------------------
+ * protects updates to fs_devices::devices, ie. adding and deleting
+ *
+ * simple list traversal with read-only actions can be done with RCU protection
+ *
+ * may be used to exclude some operations from running concurrently without any
+ * modifications to the list (see write_all_supers)
+ *
+ * volume_mutex
+ * ------------
+ * coarse lock owned by a mounted filesystem; used to exclude some operations
+ * that cannot run in parallel and affect the higher-level properties of the
+ * filesystem like: device add/deleting/resize/replace, or balance
+ *
+ * balance_mutex
+ * -------------
+ * protects balance structures (status, state) and context accessed from
+ * several places (internally, ioctl)
+ *
+ * chunk_mutex
+ * -----------
+ * protects chunks, adding or removing during allocation, trim or when a new
+ * device is added/removed
+ *
+ * cleaner_mutex
+ * -------------
+ * a big lock that is held by the cleaner thread and prevents running subvolume
+ * cleaning together with relocation or delayed iputs
+ *
+ *
+ * Lock nesting
+ * ============
+ *
+ * uuid_mutex
+ *   volume_mutex
+ *     device_list_mutex
+ *       chunk_mutex
+ *     balance_mutex
+ */
+
 DEFINE_MUTEX(uuid_mutex);
 static LIST_HEAD(fs_uuids);
 struct list_head *btrfs_get_fs_uuids(void)
@@ -180,6 +245,13 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
        return fs_devs;
 }
 
+static void free_device(struct btrfs_device *device)
+{
+       rcu_string_free(device->name);
+       bio_put(device->flush_bio);
+       kfree(device);
+}
+
 static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
 {
        struct btrfs_device *device;
@@ -188,9 +260,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
                device = list_entry(fs_devices->devices.next,
                                    struct btrfs_device, dev_list);
                list_del(&device->dev_list);
-               rcu_string_free(device->name);
-               bio_put(device->flush_bio);
-               kfree(device);
+               free_device(device);
        }
        kfree(fs_devices);
 }
@@ -220,6 +290,11 @@ void btrfs_cleanup_fs_uuids(void)
        }
 }
 
+/*
+ * Returns a pointer to a new btrfs_device on success; ERR_PTR() on error.
+ * Returned struct is not linked onto any lists and must be destroyed using
+ * free_device.
+ */
 static struct btrfs_device *__alloc_device(void)
 {
        struct btrfs_device *dev;
@@ -244,7 +319,6 @@ static struct btrfs_device *__alloc_device(void)
 
        spin_lock_init(&dev->io_lock);
 
-       spin_lock_init(&dev->reada_lock);
        atomic_set(&dev->reada_in_flight, 0);
        atomic_set(&dev->dev_stats_ccnt, 0);
        btrfs_device_data_ordered_init(dev);
@@ -530,45 +604,42 @@ static void pending_bios_fn(struct btrfs_work *work)
        run_scheduled_bios(device);
 }
 
-
-static void btrfs_free_stale_device(struct btrfs_device *cur_dev)
+/*
+ *  Search and remove all stale (devices which are not mounted) devices.
+ *  When both inputs are NULL, it will search and release all stale devices.
+ *  path:      Optional. When provided will it release all unmounted devices
+ *             matching this path only.
+ *  skip_dev:  Optional. Will skip this device when searching for the stale
+ *             devices.
+ */
+static void btrfs_free_stale_devices(const char *path,
+                                    struct btrfs_device *skip_dev)
 {
-       struct btrfs_fs_devices *fs_devs;
-       struct btrfs_device *dev;
-
-       if (!cur_dev->name)
-               return;
+       struct btrfs_fs_devices *fs_devs, *tmp_fs_devs;
+       struct btrfs_device *dev, *tmp_dev;
 
-       list_for_each_entry(fs_devs, &fs_uuids, list) {
-               int del = 1;
+       list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, list) {
 
                if (fs_devs->opened)
                        continue;
-               if (fs_devs->seeding)
-                       continue;
 
-               list_for_each_entry(dev, &fs_devs->devices, dev_list) {
+               list_for_each_entry_safe(dev, tmp_dev,
+                                        &fs_devs->devices, dev_list) {
+                       int not_found = 0;
 
-                       if (dev == cur_dev)
+                       if (skip_dev && skip_dev == dev)
                                continue;
-                       if (!dev->name)
+                       if (path && !dev->name)
                                continue;
 
-                       /*
-                        * Todo: This won't be enough. What if the same device
-                        * comes back (with new uuid and) with its mapper path?
-                        * But for now, this does help as mostly an admin will
-                        * either use mapper or non mapper path throughout.
-                        */
                        rcu_read_lock();
-                       del = strcmp(rcu_str_deref(dev->name),
-                                               rcu_str_deref(cur_dev->name));
+                       if (path)
+                               not_found = strcmp(rcu_str_deref(dev->name),
+                                                  path);
                        rcu_read_unlock();
-                       if (!del)
-                               break;
-               }
+                       if (not_found)
+                               continue;
 
-               if (!del) {
                        /* delete the stale device */
                        if (fs_devs->num_devices == 1) {
                                btrfs_sysfs_remove_fsid(fs_devs);
@@ -577,38 +648,99 @@ static void btrfs_free_stale_device(struct btrfs_device *cur_dev)
                        } else {
                                fs_devs->num_devices--;
                                list_del(&dev->dev_list);
-                               rcu_string_free(dev->name);
-                               bio_put(dev->flush_bio);
-                               kfree(dev);
+                               free_device(dev);
                        }
-                       break;
                }
        }
 }
 
+static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
+                       struct btrfs_device *device, fmode_t flags,
+                       void *holder)
+{
+       struct request_queue *q;
+       struct block_device *bdev;
+       struct buffer_head *bh;
+       struct btrfs_super_block *disk_super;
+       u64 devid;
+       int ret;
+
+       if (device->bdev)
+               return -EINVAL;
+       if (!device->name)
+               return -EINVAL;
+
+       ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
+                                   &bdev, &bh);
+       if (ret)
+               return ret;
+
+       disk_super = (struct btrfs_super_block *)bh->b_data;
+       devid = btrfs_stack_device_id(&disk_super->dev_item);
+       if (devid != device->devid)
+               goto error_brelse;
+
+       if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE))
+               goto error_brelse;
+
+       device->generation = btrfs_super_generation(disk_super);
+
+       if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
+               clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
+               fs_devices->seeding = 1;
+       } else {
+               if (bdev_read_only(bdev))
+                       clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
+               else
+                       set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
+       }
+
+       q = bdev_get_queue(bdev);
+       if (!blk_queue_nonrot(q))
+               fs_devices->rotating = 1;
+
+       device->bdev = bdev;
+       clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
+       device->mode = flags;
+
+       fs_devices->open_devices++;
+       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
+           device->devid != BTRFS_DEV_REPLACE_DEVID) {
+               fs_devices->rw_devices++;
+               list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
+       }
+       brelse(bh);
+
+       return 0;
+
+error_brelse:
+       brelse(bh);
+       blkdev_put(bdev, flags);
+
+       return -EINVAL;
+}
+
 /*
  * Add new device to list of registered devices
  *
  * Returns:
- * 1   - first time device is seen
- * 0   - device already known
- * < 0 - error
+ * device pointer which was just added or updated when successful
+ * error pointer when failed
  */
-static noinline int device_list_add(const char *path,
-                          struct btrfs_super_block *disk_super,
-                          u64 devid, struct btrfs_fs_devices **fs_devices_ret)
+static noinline struct btrfs_device *device_list_add(const char *path,
+                          struct btrfs_super_block *disk_super)
 {
        struct btrfs_device *device;
        struct btrfs_fs_devices *fs_devices;
        struct rcu_string *name;
-       int ret = 0;
        u64 found_transid = btrfs_super_generation(disk_super);
+       u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
 
        fs_devices = find_fsid(disk_super->fsid);
        if (!fs_devices) {
                fs_devices = alloc_fs_devices(disk_super->fsid);
                if (IS_ERR(fs_devices))
-                       return PTR_ERR(fs_devices);
+                       return ERR_CAST(fs_devices);
 
                list_add(&fs_devices->list, &fs_uuids);
 
@@ -620,20 +752,19 @@ static noinline int device_list_add(const char *path,
 
        if (!device) {
                if (fs_devices->opened)
-                       return -EBUSY;
+                       return ERR_PTR(-EBUSY);
 
                device = btrfs_alloc_device(NULL, &devid,
                                            disk_super->dev_item.uuid);
                if (IS_ERR(device)) {
                        /* we can safely leave the fs_devices entry around */
-                       return PTR_ERR(device);
+                       return device;
                }
 
                name = rcu_string_strdup(path, GFP_NOFS);
                if (!name) {
-                       bio_put(device->flush_bio);
-                       kfree(device);
-                       return -ENOMEM;
+                       free_device(device);
+                       return ERR_PTR(-ENOMEM);
                }
                rcu_assign_pointer(device->name, name);
 
@@ -642,8 +773,16 @@ static noinline int device_list_add(const char *path,
                fs_devices->num_devices++;
                mutex_unlock(&fs_devices->device_list_mutex);
 
-               ret = 1;
                device->fs_devices = fs_devices;
+               btrfs_free_stale_devices(path, device);
+
+               if (disk_super->label[0])
+                       pr_info("BTRFS: device label %s devid %llu transid %llu %s\n",
+                               disk_super->label, devid, found_transid, path);
+               else
+                       pr_info("BTRFS: device fsid %pU devid %llu transid %llu %s\n",
+                               disk_super->fsid, devid, found_transid, path);
+
        } else if (!device->name || strcmp(device->name->str, path)) {
                /*
                 * When FS is already mounted.
@@ -679,17 +818,17 @@ static noinline int device_list_add(const char *path,
                         * with larger generation number or the last-in if
                         * generation are equal.
                         */
-                       return -EEXIST;
+                       return ERR_PTR(-EEXIST);
                }
 
                name = rcu_string_strdup(path, GFP_NOFS);
                if (!name)
-                       return -ENOMEM;
+                       return ERR_PTR(-ENOMEM);
                rcu_string_free(device->name);
                rcu_assign_pointer(device->name, name);
-               if (device->missing) {
+               if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
                        fs_devices->missing_devices--;
-                       device->missing = 0;
+                       clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
                }
        }
 
@@ -702,16 +841,9 @@ static noinline int device_list_add(const char *path,
        if (!fs_devices->opened)
                device->generation = found_transid;
 
-       /*
-        * if there is new btrfs on an already registered device,
-        * then remove the stale device entry.
-        */
-       if (ret > 0)
-               btrfs_free_stale_device(device);
-
-       *fs_devices_ret = fs_devices;
+       fs_devices->total_devices = btrfs_super_num_devices(disk_super);
 
-       return ret;
+       return device;
 }
 
 static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
@@ -744,8 +876,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
                        name = rcu_string_strdup(orig_dev->name->str,
                                        GFP_KERNEL);
                        if (!name) {
-                               bio_put(device->flush_bio);
-                               kfree(device);
+                               free_device(device);
                                goto error;
                        }
                        rcu_assign_pointer(device->name, name);
@@ -772,10 +903,12 @@ void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step)
 again:
        /* This is the initialized path, it is safe to release the devices. */
        list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
-               if (device->in_fs_metadata) {
-                       if (!device->is_tgtdev_for_dev_replace &&
-                           (!latest_dev ||
-                            device->generation > latest_dev->generation)) {
+               if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
+                                                       &device->dev_state)) {
+                       if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
+                            &device->dev_state) &&
+                            (!latest_dev ||
+                             device->generation > latest_dev->generation)) {
                                latest_dev = device;
                        }
                        continue;
@@ -792,7 +925,8 @@ again:
                         * not, which means whether this device is
                         * used or whether it should be removed.
                         */
-                       if (step == 0 || device->is_tgtdev_for_dev_replace) {
+                       if (step == 0 || test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
+                                                 &device->dev_state)) {
                                continue;
                        }
                }
@@ -801,17 +935,16 @@ again:
                        device->bdev = NULL;
                        fs_devices->open_devices--;
                }
-               if (device->writeable) {
+               if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
                        list_del_init(&device->dev_alloc_list);
-                       device->writeable = 0;
-                       if (!device->is_tgtdev_for_dev_replace)
+                       clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
+                       if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
+                                     &device->dev_state))
                                fs_devices->rw_devices--;
                }
                list_del_init(&device->dev_list);
                fs_devices->num_devices--;
-               rcu_string_free(device->name);
-               bio_put(device->flush_bio);
-               kfree(device);
+               free_device(device);
        }
 
        if (fs_devices->seed) {
@@ -824,35 +957,25 @@ again:
        mutex_unlock(&uuid_mutex);
 }
 
-static void __free_device(struct work_struct *work)
-{
-       struct btrfs_device *device;
-
-       device = container_of(work, struct btrfs_device, rcu_work);
-       rcu_string_free(device->name);
-       bio_put(device->flush_bio);
-       kfree(device);
-}
-
-static void free_device(struct rcu_head *head)
+static void free_device_rcu(struct rcu_head *head)
 {
        struct btrfs_device *device;
 
        device = container_of(head, struct btrfs_device, rcu);
-
-       INIT_WORK(&device->rcu_work, __free_device);
-       schedule_work(&device->rcu_work);
+       free_device(device);
 }
 
 static void btrfs_close_bdev(struct btrfs_device *device)
 {
-       if (device->bdev && device->writeable) {
+       if (!device->bdev)
+               return;
+
+       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
                sync_blockdev(device->bdev);
                invalidate_bdev(device->bdev);
        }
 
-       if (device->bdev)
-               blkdev_put(device->bdev, device->mode);
+       blkdev_put(device->bdev, device->mode);
 }
 
 static void btrfs_prepare_close_one_device(struct btrfs_device *device)
@@ -864,13 +987,13 @@ static void btrfs_prepare_close_one_device(struct btrfs_device *device)
        if (device->bdev)
                fs_devices->open_devices--;
 
-       if (device->writeable &&
+       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
            device->devid != BTRFS_DEV_REPLACE_DEVID) {
                list_del_init(&device->dev_alloc_list);
                fs_devices->rw_devices--;
        }
 
-       if (device->missing)
+       if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
                fs_devices->missing_devices--;
 
        new_device = btrfs_alloc_device(NULL, &device->devid,
@@ -916,7 +1039,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
                                struct btrfs_device, dev_list);
                list_del(&device->dev_list);
                btrfs_close_bdev(device);
-               call_rcu(&device->rcu, free_device);
+               call_rcu(&device->rcu, free_device_rcu);
        }
 
        WARN_ON(fs_devices->open_devices);
@@ -946,93 +1069,32 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
                __btrfs_close_devices(fs_devices);
                free_fs_devices(fs_devices);
        }
-       /*
-        * Wait for rcu kworkers under __btrfs_close_devices
-        * to finish all blkdev_puts so device is really
-        * free when umount is done.
-        */
-       rcu_barrier();
        return ret;
 }
 
 static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
                                fmode_t flags, void *holder)
 {
-       struct request_queue *q;
-       struct block_device *bdev;
        struct list_head *head = &fs_devices->devices;
        struct btrfs_device *device;
        struct btrfs_device *latest_dev = NULL;
-       struct buffer_head *bh;
-       struct btrfs_super_block *disk_super;
-       u64 devid;
-       int seeding = 1;
        int ret = 0;
 
        flags |= FMODE_EXCL;
 
        list_for_each_entry(device, head, dev_list) {
-               if (device->bdev)
-                       continue;
-               if (!device->name)
-                       continue;
-
                /* Just open everything we can; ignore failures here */
-               if (btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
-                                           &bdev, &bh))
+               if (btrfs_open_one_device(fs_devices, device, flags, holder))
                        continue;
 
-               disk_super = (struct btrfs_super_block *)bh->b_data;
-               devid = btrfs_stack_device_id(&disk_super->dev_item);
-               if (devid != device->devid)
-                       goto error_brelse;
-
-               if (memcmp(device->uuid, disk_super->dev_item.uuid,
-                          BTRFS_UUID_SIZE))
-                       goto error_brelse;
-
-               device->generation = btrfs_super_generation(disk_super);
                if (!latest_dev ||
                    device->generation > latest_dev->generation)
                        latest_dev = device;
-
-               if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
-                       device->writeable = 0;
-               } else {
-                       device->writeable = !bdev_read_only(bdev);
-                       seeding = 0;
-               }
-
-               q = bdev_get_queue(bdev);
-               if (blk_queue_discard(q))
-                       device->can_discard = 1;
-               if (!blk_queue_nonrot(q))
-                       fs_devices->rotating = 1;
-
-               device->bdev = bdev;
-               device->in_fs_metadata = 0;
-               device->mode = flags;
-
-               fs_devices->open_devices++;
-               if (device->writeable &&
-                   device->devid != BTRFS_DEV_REPLACE_DEVID) {
-                       fs_devices->rw_devices++;
-                       list_add(&device->dev_alloc_list,
-                                &fs_devices->alloc_list);
-               }
-               brelse(bh);
-               continue;
-
-error_brelse:
-               brelse(bh);
-               blkdev_put(bdev, flags);
-               continue;
        }
        if (fs_devices->open_devices == 0) {
                ret = -EINVAL;
                goto out;
        }
-       fs_devices->seeding = seeding;
        fs_devices->opened = 1;
        fs_devices->latest_bdev = latest_dev->bdev;
        fs_devices->total_rw_bytes = 0;
@@ -1116,12 +1178,10 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
                          struct btrfs_fs_devices **fs_devices_ret)
 {
        struct btrfs_super_block *disk_super;
+       struct btrfs_device *device;
        struct block_device *bdev;
        struct page *page;
-       int ret = -EINVAL;
-       u64 devid;
-       u64 transid;
-       u64 total_devices;
+       int ret = 0;
        u64 bytenr;
 
        /*
@@ -1140,26 +1200,16 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
                goto error;
        }
 
-       if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super))
+       if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) {
+               ret = -EINVAL;
                goto error_bdev_put;
-
-       devid = btrfs_stack_device_id(&disk_super->dev_item);
-       transid = btrfs_super_generation(disk_super);
-       total_devices = btrfs_super_num_devices(disk_super);
-
-       ret = device_list_add(path, disk_super, devid, fs_devices_ret);
-       if (ret > 0) {
-               if (disk_super->label[0]) {
-                       pr_info("BTRFS: device label %s ", disk_super->label);
-               } else {
-                       pr_info("BTRFS: device fsid %pU ", disk_super->fsid);
-               }
-
-               pr_cont("devid %llu transid %llu %s\n", devid, transid, path);
-               ret = 0;
        }
-       if (!ret && fs_devices_ret)
-               (*fs_devices_ret)->total_devices = total_devices;
+
+       device = device_list_add(path, disk_super);
+       if (IS_ERR(device))
+               ret = PTR_ERR(device);
+       else
+               *fs_devices_ret = device->fs_devices;
 
        btrfs_release_disk_super(page);
 
@@ -1185,7 +1235,8 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
 
        *length = 0;
 
-       if (start >= device->total_bytes || device->is_tgtdev_for_dev_replace)
+       if (start >= device->total_bytes ||
+               test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
                return 0;
 
        path = btrfs_alloc_path();
@@ -1363,7 +1414,8 @@ int find_free_dev_extent_start(struct btrfs_transaction *transaction,
        max_hole_size = 0;
 
 again:
-       if (search_start >= search_end || device->is_tgtdev_for_dev_replace) {
+       if (search_start >= search_end ||
+               test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
                ret = -ENOSPC;
                goto out;
        }
@@ -1570,8 +1622,8 @@ static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
        struct extent_buffer *leaf;
        struct btrfs_key key;
 
-       WARN_ON(!device->in_fs_metadata);
-       WARN_ON(device->is_tgtdev_for_dev_replace);
+       WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state));
+       WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
@@ -1661,7 +1713,7 @@ error:
  * the device information is stored in the chunk root
  * the btrfs_device struct should be fully filled in
  */
-static int btrfs_add_device(struct btrfs_trans_handle *trans,
+static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
                            struct btrfs_fs_info *fs_info,
                            struct btrfs_device *device)
 {
@@ -1817,7 +1869,8 @@ static struct btrfs_device * btrfs_find_next_active_device(
 
        list_for_each_entry(next_device, &fs_devs->devices, dev_list) {
                if (next_device != device &&
-                       !next_device->missing && next_device->bdev)
+                   !test_bit(BTRFS_DEV_STATE_MISSING, &next_device->dev_state)
+                   && next_device->bdev)
                        return next_device;
        }
 
@@ -1858,6 +1911,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
        u64 num_devices;
        int ret = 0;
 
+       mutex_lock(&fs_info->volume_mutex);
        mutex_lock(&uuid_mutex);
 
        num_devices = fs_info->fs_devices->num_devices;
@@ -1877,17 +1931,18 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
        if (ret)
                goto out;
 
-       if (device->is_tgtdev_for_dev_replace) {
+       if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
                ret = BTRFS_ERROR_DEV_TGT_REPLACE;
                goto out;
        }
 
-       if (device->writeable && fs_info->fs_devices->rw_devices == 1) {
+       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
+           fs_info->fs_devices->rw_devices == 1) {
                ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
                goto out;
        }
 
-       if (device->writeable) {
+       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
                mutex_lock(&fs_info->chunk_mutex);
                list_del_init(&device->dev_alloc_list);
                device->fs_devices->rw_devices--;
@@ -1909,7 +1964,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
        if (ret)
                goto error_undo;
 
-       device->in_fs_metadata = 0;
+       clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
        btrfs_scrub_cancel_dev(fs_info, device);
 
        /*
@@ -1929,7 +1984,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
        device->fs_devices->num_devices--;
        device->fs_devices->total_devices--;
 
-       if (device->missing)
+       if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
                device->fs_devices->missing_devices--;
 
        btrfs_assign_next_active_device(fs_info, device, NULL);
@@ -1949,11 +2004,11 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
         * the devices list.  All that's left is to zero out the old
         * supers and free the device.
         */
-       if (device->writeable)
+       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
                btrfs_scratch_superblocks(device->bdev, device->name->str);
 
        btrfs_close_bdev(device);
-       call_rcu(&device->rcu, free_device);
+       call_rcu(&device->rcu, free_device_rcu);
 
        if (cur_devices->open_devices == 0) {
                struct btrfs_fs_devices *fs_devices;
@@ -1972,10 +2027,11 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
 
 out:
        mutex_unlock(&uuid_mutex);
+       mutex_unlock(&fs_info->volume_mutex);
        return ret;
 
 error_undo:
-       if (device->writeable) {
+       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
                mutex_lock(&fs_info->chunk_mutex);
                list_add(&device->dev_alloc_list,
                         &fs_info->fs_devices->alloc_list);
@@ -2003,10 +2059,10 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
        list_del_rcu(&srcdev->dev_list);
        list_del(&srcdev->dev_alloc_list);
        fs_devices->num_devices--;
-       if (srcdev->missing)
+       if (test_bit(BTRFS_DEV_STATE_MISSING, &srcdev->dev_state))
                fs_devices->missing_devices--;
 
-       if (srcdev->writeable)
+       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state))
                fs_devices->rw_devices--;
 
        if (srcdev->bdev)
@@ -2018,13 +2074,13 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
 {
        struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
 
-       if (srcdev->writeable) {
+       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) {
                /* zero out the old super if it is writable */
                btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
        }
 
        btrfs_close_bdev(srcdev);
-       call_rcu(&srcdev->rcu, free_device);
+       call_rcu(&srcdev->rcu, free_device_rcu);
 
        /* if this is no devs we rather delete the fs_devices */
        if (!fs_devices->num_devices) {
@@ -2083,7 +2139,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
        btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
 
        btrfs_close_bdev(tgtdev);
-       call_rcu(&tgtdev->rcu, free_device);
+       call_rcu(&tgtdev->rcu, free_device_rcu);
 }
 
 static int btrfs_find_device_by_path(struct btrfs_fs_info *fs_info,
@@ -2128,7 +2184,8 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
                 * is held by the caller.
                 */
                list_for_each_entry(tmp, devices, dev_list) {
-                       if (tmp->in_fs_metadata && !tmp->bdev) {
+                       if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
+                                       &tmp->dev_state) && !tmp->bdev) {
                                *device = tmp;
                                break;
                        }
@@ -2357,26 +2414,19 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 
        name = rcu_string_strdup(device_path, GFP_KERNEL);
        if (!name) {
-               bio_put(device->flush_bio);
-               kfree(device);
                ret = -ENOMEM;
-               goto error;
+               goto error_free_device;
        }
        rcu_assign_pointer(device->name, name);
 
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
-               rcu_string_free(device->name);
-               bio_put(device->flush_bio);
-               kfree(device);
                ret = PTR_ERR(trans);
-               goto error;
+               goto error_free_device;
        }
 
        q = bdev_get_queue(bdev);
-       if (blk_queue_discard(q))
-               device->can_discard = 1;
-       device->writeable = 1;
+       set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
        device->generation = trans->transid;
        device->io_width = fs_info->sectorsize;
        device->io_align = fs_info->sectorsize;
@@ -2387,8 +2437,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
        device->commit_total_bytes = device->total_bytes;
        device->fs_info = fs_info;
        device->bdev = bdev;
-       device->in_fs_metadata = 1;
-       device->is_tgtdev_for_dev_replace = 0;
+       set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
+       clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
        device->mode = FMODE_EXCL;
        device->dev_stats_valid = 1;
        set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
@@ -2449,7 +2499,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
                }
        }
 
-       ret = btrfs_add_device(trans, fs_info, device);
+       ret = btrfs_add_dev_item(trans, fs_info, device);
        if (ret) {
                btrfs_abort_transaction(trans, ret);
                goto error_sysfs;
@@ -2510,9 +2560,8 @@ error_trans:
                sb->s_flags |= SB_RDONLY;
        if (trans)
                btrfs_end_transaction(trans);
-       rcu_string_free(device->name);
-       bio_put(device->flush_bio);
-       kfree(device);
+error_free_device:
+       free_device(device);
 error:
        blkdev_put(bdev, FMODE_EXCL);
        if (seeding_dev && !unlocked) {
@@ -2527,7 +2576,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
                                  struct btrfs_device *srcdev,
                                  struct btrfs_device **device_out)
 {
-       struct request_queue *q;
        struct btrfs_device *device;
        struct block_device *bdev;
        struct list_head *devices;
@@ -2578,18 +2626,14 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 
        name = rcu_string_strdup(device_path, GFP_KERNEL);
        if (!name) {
-               bio_put(device->flush_bio);
-               kfree(device);
+               free_device(device);
                ret = -ENOMEM;
                goto error;
        }
        rcu_assign_pointer(device->name, name);
 
-       q = bdev_get_queue(bdev);
-       if (blk_queue_discard(q))
-               device->can_discard = 1;
        mutex_lock(&fs_info->fs_devices->device_list_mutex);
-       device->writeable = 1;
+       set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
        device->generation = 0;
        device->io_width = fs_info->sectorsize;
        device->io_align = fs_info->sectorsize;
@@ -2602,8 +2646,8 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
        device->commit_bytes_used = device->bytes_used;
        device->fs_info = fs_info;
        device->bdev = bdev;
-       device->in_fs_metadata = 1;
-       device->is_tgtdev_for_dev_replace = 1;
+       set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
+       set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
        device->mode = FMODE_EXCL;
        device->dev_stats_valid = 1;
        set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
@@ -2631,7 +2675,7 @@ void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
        tgtdev->io_align = sectorsize;
        tgtdev->sector_size = sectorsize;
        tgtdev->fs_info = fs_info;
-       tgtdev->in_fs_metadata = 1;
+       set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &tgtdev->dev_state);
 }
 
 static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
@@ -2689,7 +2733,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
        u64 old_total;
        u64 diff;
 
-       if (!device->writeable)
+       if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
                return -EACCES;
 
        new_size = round_down(new_size, fs_info->sectorsize);
@@ -2699,7 +2743,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
        diff = round_down(new_size - device->total_bytes, fs_info->sectorsize);
 
        if (new_size <= device->total_bytes ||
-           device->is_tgtdev_for_dev_replace) {
+           test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
                mutex_unlock(&fs_info->chunk_mutex);
                return -EINVAL;
        }
@@ -3043,6 +3087,48 @@ error:
        return ret;
 }
 
+/*
+ * return 1 : allocate a data chunk successfully,
+ * return <0: errors during allocating a data chunk,
+ * return 0 : no need to allocate a data chunk.
+ */
+static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
+                                     u64 chunk_offset)
+{
+       struct btrfs_block_group_cache *cache;
+       u64 bytes_used;
+       u64 chunk_type;
+
+       cache = btrfs_lookup_block_group(fs_info, chunk_offset);
+       ASSERT(cache);
+       chunk_type = cache->flags;
+       btrfs_put_block_group(cache);
+
+       if (chunk_type & BTRFS_BLOCK_GROUP_DATA) {
+               spin_lock(&fs_info->data_sinfo->lock);
+               bytes_used = fs_info->data_sinfo->bytes_used;
+               spin_unlock(&fs_info->data_sinfo->lock);
+
+               if (!bytes_used) {
+                       struct btrfs_trans_handle *trans;
+                       int ret;
+
+                       trans = btrfs_join_transaction(fs_info->tree_root);
+                       if (IS_ERR(trans))
+                               return PTR_ERR(trans);
+
+                       ret = btrfs_force_chunk_alloc(trans, fs_info,
+                                                     BTRFS_BLOCK_GROUP_DATA);
+                       btrfs_end_transaction(trans);
+                       if (ret < 0)
+                               return ret;
+
+                       return 1;
+               }
+       }
+       return 0;
+}
+
 static int insert_balance_item(struct btrfs_fs_info *fs_info,
                               struct btrfs_balance_control *bctl)
 {
@@ -3501,7 +3587,6 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
        u32 count_meta = 0;
        u32 count_sys = 0;
        int chunk_reserved = 0;
-       u64 bytes_used = 0;
 
        /* step one make some room on all the devices */
        devices = &fs_info->fs_devices->devices;
@@ -3509,10 +3594,10 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
                old_size = btrfs_device_get_total_bytes(device);
                size_to_free = div_factor(old_size, 1);
                size_to_free = min_t(u64, size_to_free, SZ_1M);
-               if (!device->writeable ||
+               if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) ||
                    btrfs_device_get_total_bytes(device) -
                    btrfs_device_get_bytes_used(device) > size_to_free ||
-                   device->is_tgtdev_for_dev_replace)
+                   test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
                        continue;
 
                ret = btrfs_shrink_device(device, old_size - size_to_free);
@@ -3660,28 +3745,21 @@ again:
                        goto loop;
                }
 
-               ASSERT(fs_info->data_sinfo);
-               spin_lock(&fs_info->data_sinfo->lock);
-               bytes_used = fs_info->data_sinfo->bytes_used;
-               spin_unlock(&fs_info->data_sinfo->lock);
-
-               if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
-                   !chunk_reserved && !bytes_used) {
-                       trans = btrfs_start_transaction(chunk_root, 0);
-                       if (IS_ERR(trans)) {
-                               mutex_unlock(&fs_info->delete_unused_bgs_mutex);
-                               ret = PTR_ERR(trans);
-                               goto error;
-                       }
-
-                       ret = btrfs_force_chunk_alloc(trans, fs_info,
-                                                     BTRFS_BLOCK_GROUP_DATA);
-                       btrfs_end_transaction(trans);
+               if (!chunk_reserved) {
+                       /*
+                        * We may be relocating the only data chunk we have,
+                        * which could potentially end up with losing data's
+                        * raid profile, so lets allocate an empty one in
+                        * advance.
+                        */
+                       ret = btrfs_may_alloc_data_chunk(fs_info,
+                                                        found_key.offset);
                        if (ret < 0) {
                                mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                                goto error;
+                       } else if (ret == 1) {
+                               chunk_reserved = 1;
                        }
-                       chunk_reserved = 1;
                }
 
                ret = btrfs_relocate_chunk(fs_info, found_key.offset);
@@ -4380,7 +4458,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
        new_size = round_down(new_size, fs_info->sectorsize);
        diff = round_down(old_size - new_size, fs_info->sectorsize);
 
-       if (device->is_tgtdev_for_dev_replace)
+       if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
                return -EINVAL;
 
        path = btrfs_alloc_path();
@@ -4392,7 +4470,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
        mutex_lock(&fs_info->chunk_mutex);
 
        btrfs_device_set_total_bytes(device, new_size);
-       if (device->writeable) {
+       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
                device->fs_devices->total_rw_bytes -= diff;
                atomic64_sub(diff, &fs_info->free_chunk_space);
        }
@@ -4444,6 +4522,18 @@ again:
                chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
                btrfs_release_path(path);
 
+               /*
+                * We may be relocating the only data chunk we have,
+                * which could potentially end up with losing data's
+                * raid profile, so lets allocate an empty one in
+                * advance.
+                */
+               ret = btrfs_may_alloc_data_chunk(fs_info, chunk_offset);
+               if (ret < 0) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+                       goto done;
+               }
+
                ret = btrfs_relocate_chunk(fs_info, chunk_offset);
                mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                if (ret && ret != -ENOSPC)
@@ -4517,7 +4607,7 @@ done:
        if (ret) {
                mutex_lock(&fs_info->chunk_mutex);
                btrfs_device_set_total_bytes(device, old_size);
-               if (device->writeable)
+               if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
                        device->fs_devices->total_rw_bytes += diff;
                atomic64_add(diff, &fs_info->free_chunk_space);
                mutex_unlock(&fs_info->chunk_mutex);
@@ -4677,14 +4767,15 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                u64 max_avail;
                u64 dev_offset;
 
-               if (!device->writeable) {
+               if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
                        WARN(1, KERN_ERR
                               "BTRFS: read-only device in alloc_list\n");
                        continue;
                }
 
-               if (!device->in_fs_metadata ||
-                   device->is_tgtdev_for_dev_replace)
+               if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
+                                       &device->dev_state) ||
+                   test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
                        continue;
 
                if (device->total_bytes > device->bytes_used)
@@ -5032,12 +5123,13 @@ int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
 
        map = em->map_lookup;
        for (i = 0; i < map->num_stripes; i++) {
-               if (map->stripes[i].dev->missing) {
+               if (test_bit(BTRFS_DEV_STATE_MISSING,
+                                       &map->stripes[i].dev->dev_state)) {
                        miss_ndevs++;
                        continue;
                }
-
-               if (!map->stripes[i].dev->writeable) {
+               if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
+                                       &map->stripes[i].dev->dev_state)) {
                        readonly = 1;
                        goto end;
                }
@@ -5103,7 +5195,14 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
        else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
                ret = 2;
        else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
-               ret = 3;
+               /*
+                * There could be two corrupted data stripes, we need
+                * to loop retry in order to rebuild the correct data.
+                * 
+                * Fail a stripe at a time on every retry except the
+                * stripe under reconstruction.
+                */
+               ret = map->num_stripes;
        else
                ret = 1;
        free_extent_map(em);
@@ -6003,15 +6102,14 @@ static void btrfs_end_bio(struct bio *bio)
                        dev = bbio->stripes[stripe_index].dev;
                        if (dev->bdev) {
                                if (bio_op(bio) == REQ_OP_WRITE)
-                                       btrfs_dev_stat_inc(dev,
+                                       btrfs_dev_stat_inc_and_print(dev,
                                                BTRFS_DEV_STAT_WRITE_ERRS);
                                else
-                                       btrfs_dev_stat_inc(dev,
+                                       btrfs_dev_stat_inc_and_print(dev,
                                                BTRFS_DEV_STAT_READ_ERRS);
                                if (bio->bi_opf & REQ_PREFLUSH)
-                                       btrfs_dev_stat_inc(dev,
+                                       btrfs_dev_stat_inc_and_print(dev,
                                                BTRFS_DEV_STAT_FLUSH_ERRS);
-                               btrfs_dev_stat_print_on_error(dev);
                        }
                }
        }
@@ -6061,16 +6159,15 @@ static noinline void btrfs_schedule_bio(struct btrfs_device *device,
        int should_queue = 1;
        struct btrfs_pending_bios *pending_bios;
 
-       if (device->missing || !device->bdev) {
+       if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state) ||
+           !device->bdev) {
                bio_io_error(bio);
                return;
        }
 
        /* don't bother with additional async steps for reads, right now */
        if (bio_op(bio) == REQ_OP_READ) {
-               bio_get(bio);
                btrfsic_submit_bio(bio);
-               bio_put(bio);
                return;
        }
 
@@ -6207,7 +6304,8 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
        for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
                dev = bbio->stripes[dev_nr].dev;
                if (!dev || !dev->bdev ||
-                   (bio_op(first_bio) == REQ_OP_WRITE && !dev->writeable)) {
+                   (bio_op(first_bio) == REQ_OP_WRITE &&
+                   !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
                        bbio_error(bbio, first_bio, logical);
                        continue;
                }
@@ -6256,7 +6354,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
        device->fs_devices = fs_devices;
        fs_devices->num_devices++;
 
-       device->missing = 1;
+       set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
        fs_devices->missing_devices++;
 
        return device;
@@ -6272,8 +6370,8 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
  *             is generated.
  *
  * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR()
- * on error.  Returned struct is not linked onto any lists and can be
- * destroyed with kfree() right away.
+ * on error.  Returned struct is not linked onto any lists and must be
+ * destroyed with free_device.
  */
 struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
                                        const u64 *devid,
@@ -6296,8 +6394,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
 
                ret = find_next_devid(fs_info, &tmp);
                if (ret) {
-                       bio_put(dev->flush_bio);
-                       kfree(dev);
+                       free_device(dev);
                        return ERR_PTR(ret);
                }
        }
@@ -6476,7 +6573,9 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
                        }
                        btrfs_report_missing_device(fs_info, devid, uuid, false);
                }
-               map->stripes[i].dev->in_fs_metadata = 1;
+               set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
+                               &(map->stripes[i].dev->dev_state));
+
        }
 
        write_lock(&map_tree->map_tree.lock);
@@ -6505,7 +6604,7 @@ static void fill_device_from_item(struct extent_buffer *leaf,
        device->io_width = btrfs_device_io_width(leaf, dev_item);
        device->sector_size = btrfs_device_sector_size(leaf, dev_item);
        WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
-       device->is_tgtdev_for_dev_replace = 0;
+       clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
 
        ptr = btrfs_device_uuid(dev_item);
        read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
@@ -6617,7 +6716,8 @@ static int read_one_dev(struct btrfs_fs_info *fs_info,
                                                        dev_uuid, false);
                }
 
-               if(!device->bdev && !device->missing) {
+               if (!device->bdev &&
+                   !test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
                        /*
                         * this happens when a device that was properly setup
                         * in the device info lists suddenly goes bad.
@@ -6625,12 +6725,13 @@ static int read_one_dev(struct btrfs_fs_info *fs_info,
                         * device->missing to one here
                         */
                        device->fs_devices->missing_devices++;
-                       device->missing = 1;
+                       set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
                }
 
                /* Move the device to its own fs_devices */
                if (device->fs_devices != fs_devices) {
-                       ASSERT(device->missing);
+                       ASSERT(test_bit(BTRFS_DEV_STATE_MISSING,
+                                                       &device->dev_state));
 
                        list_move(&device->dev_list, &fs_devices->devices);
                        device->fs_devices->num_devices--;
@@ -6644,15 +6745,16 @@ static int read_one_dev(struct btrfs_fs_info *fs_info,
        }
 
        if (device->fs_devices != fs_info->fs_devices) {
-               BUG_ON(device->writeable);
+               BUG_ON(test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state));
                if (device->generation !=
                    btrfs_device_generation(leaf, dev_item))
                        return -EINVAL;
        }
 
        fill_device_from_item(leaf, dev_item, device);
-       device->in_fs_metadata = 1;
-       if (device->writeable && !device->is_tgtdev_for_dev_replace) {
+       set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
+       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
+          !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
                device->fs_devices->total_rw_bytes += device->total_bytes;
                atomic64_add(device->total_bytes - device->bytes_used,
                                &fs_info->free_chunk_space);
@@ -6784,10 +6886,13 @@ out_short_read:
 /*
  * Check if all chunks in the fs are OK for read-write degraded mount
  *
+ * If the @failing_dev is specified, it's accounted as missing.
+ *
  * Return true if all chunks meet the minimal RW mount requirements.
  * Return false if any chunk doesn't meet the minimal RW mount requirements.
  */
-bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info)
+bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
+                                       struct btrfs_device *failing_dev)
 {
        struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
        struct extent_map *em;
@@ -6815,12 +6920,16 @@ bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info)
                for (i = 0; i < map->num_stripes; i++) {
                        struct btrfs_device *dev = map->stripes[i].dev;
 
-                       if (!dev || !dev->bdev || dev->missing ||
+                       if (!dev || !dev->bdev ||
+                           test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
                            dev->last_flush_error)
                                missing++;
+                       else if (failing_dev && failing_dev == dev)
+                               missing++;
                }
                if (missing > max_tolerated) {
-                       btrfs_warn(fs_info,
+                       if (!failing_dev)
+                               btrfs_warn(fs_info,
        "chunk %llu missing %d devices, max tolerance is %d for writeable mount",
                                   em->start, missing, max_tolerated);
                        free_extent_map(em);
@@ -7091,10 +7200,24 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
 
        mutex_lock(&fs_devices->device_list_mutex);
        list_for_each_entry(device, &fs_devices->devices, dev_list) {
-               if (!device->dev_stats_valid || !btrfs_dev_stats_dirty(device))
+               stats_cnt = atomic_read(&device->dev_stats_ccnt);
+               if (!device->dev_stats_valid || stats_cnt == 0)
                        continue;
 
-               stats_cnt = atomic_read(&device->dev_stats_ccnt);
+
+               /*
+                * There is a LOAD-LOAD control dependency between the value of
+                * dev_stats_ccnt and updating the on-disk values which requires
+                * reading the in-memory counters. Such control dependencies
+                * require explicit read memory barriers.
+                *
+                * This memory barriers pairs with smp_mb__before_atomic in
+                * btrfs_dev_stat_inc/btrfs_dev_stat_set and with the full
+                * barrier implied by atomic_xchg in
+                * btrfs_dev_stats_read_and_reset
+                */
+               smp_rmb();
+
                ret = update_dev_stat_item(trans, fs_info, device);
                if (!ret)
                        atomic_sub(stats_cnt, &device->dev_stats_ccnt);