Merge tag 'for-5.10-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 30 Oct 2020 20:29:49 +0000 (13:29 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 30 Oct 2020 20:29:49 +0000 (13:29 -0700)
Pull btrfs fixes from David Sterba:

 - lockdep fixes:
     - drop path locks before manipulating sysfs objects or qgroups
     - preliminary fixes before tree locks get switched to rwsem
     - use annotated seqlock

 - build warning fixes (printk format)

 - fix relocation vs fallocate race

 - tree checker properly validates number of stripes and parity

 - readahead vs device replace fixes

 - iomap dio fix for unnecessary buffered io fallback

* tag 'for-5.10-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: convert data_seqcount to seqcount_mutex_t
  btrfs: don't fallback to buffered read if we don't need to
  btrfs: add a helper to read the tree_root commit root for backref lookup
  btrfs: drop the path before adding qgroup items when enabling qgroups
  btrfs: fix readahead hang and use-after-free after removing a device
  btrfs: fix use-after-free on readahead extent after failure to create it
  btrfs: tree-checker: validate number of chunk stripes and parity
  btrfs: tree-checker: fix incorrect printk format
  btrfs: drop the path before adding block group sysfs files
  btrfs: fix relocation failure due to race with fallocate

14 files changed:
fs/btrfs/backref.c
fs/btrfs/block-group.c
fs/btrfs/ctree.h
fs/btrfs/dev-replace.c
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-tree.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/qgroup.c
fs/btrfs/reada.c
fs/btrfs/tree-checker.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h

index b3268f4ea5f34c187108a628890231d3eec43396..771a036867dc05e5dbba4dd9f06d6536c9279c0c 100644 (file)
@@ -544,7 +544,18 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
        int level = ref->level;
        struct btrfs_key search_key = ref->key_for_search;
 
-       root = btrfs_get_fs_root(fs_info, ref->root_id, false);
+       /*
+        * If we're search_commit_root we could possibly be holding locks on
+        * other tree nodes.  This happens when qgroups does backref walks when
+        * adding new delayed refs.  To deal with this we need to look in cache
+        * for the root, and if we don't find it then we need to search the
+        * tree_root's commit root, thus the btrfs_get_fs_root_commit_root usage
+        * here.
+        */
+       if (path->search_commit_root)
+               root = btrfs_get_fs_root_commit_root(fs_info, path, ref->root_id);
+       else
+               root = btrfs_get_fs_root(fs_info, ref->root_id, false);
        if (IS_ERR(root)) {
                ret = PTR_ERR(root);
                goto out_free;
index c0f1d6818df769604a3d4652d6bd684f49195d0f..3ba6f3839d3929e5ab6d7d60ce76ccd8b44a5387 100644 (file)
@@ -2024,6 +2024,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
                key.offset = 0;
                btrfs_release_path(path);
        }
+       btrfs_release_path(path);
 
        list_for_each_entry(space_info, &info->space_info, list) {
                int i;
index aac3d6f4e35b170933d43a5befa0be88f89a2966..0378933d163c6d27dca020d960dc0a782fd25fba 100644 (file)
@@ -3564,6 +3564,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
 int btrfs_reada_wait(void *handle);
 void btrfs_reada_detach(void *handle);
 int btree_readahead_hook(struct extent_buffer *eb, int err);
+void btrfs_reada_remove_dev(struct btrfs_device *dev);
+void btrfs_reada_undo_remove_dev(struct btrfs_device *dev);
 
 static inline int is_fstree(u64 rootid)
 {
index 4a0243cb9d9730401f967c46ede2ec3a62a58556..5b9e3f3ace22cf7ef89cf532dfd9f00c1ea9a81d 100644 (file)
@@ -688,6 +688,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
        }
        btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
 
+       if (!scrub_ret)
+               btrfs_reada_remove_dev(src_device);
+
        /*
         * We have to use this loop approach because at this point src_device
         * has to be available for transaction commit to complete, yet new
@@ -696,6 +699,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
        while (1) {
                trans = btrfs_start_transaction(root, 0);
                if (IS_ERR(trans)) {
+                       btrfs_reada_undo_remove_dev(src_device);
                        mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
                        return PTR_ERR(trans);
                }
@@ -746,6 +750,7 @@ error:
                up_write(&dev_replace->rwsem);
                mutex_unlock(&fs_info->chunk_mutex);
                mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+               btrfs_reada_undo_remove_dev(src_device);
                btrfs_rm_dev_replace_blocked(fs_info);
                if (tgt_device)
                        btrfs_destroy_dev_replace_tgtdev(tgt_device);
index 8e3438672a82d2f53c78922fa43572e1fc130eb8..af97ddcc6b3e8710cf9a55a4064192b3b1027b81 100644 (file)
@@ -1281,32 +1281,26 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
        return 0;
 }
 
-struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
-                                       struct btrfs_key *key)
+static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
+                                             struct btrfs_path *path,
+                                             struct btrfs_key *key)
 {
        struct btrfs_root *root;
        struct btrfs_fs_info *fs_info = tree_root->fs_info;
-       struct btrfs_path *path;
        u64 generation;
        int ret;
        int level;
 
-       path = btrfs_alloc_path();
-       if (!path)
-               return ERR_PTR(-ENOMEM);
-
        root = btrfs_alloc_root(fs_info, key->objectid, GFP_NOFS);
-       if (!root) {
-               ret = -ENOMEM;
-               goto alloc_fail;
-       }
+       if (!root)
+               return ERR_PTR(-ENOMEM);
 
        ret = btrfs_find_root(tree_root, key, path,
                              &root->root_item, &root->root_key);
        if (ret) {
                if (ret > 0)
                        ret = -ENOENT;
-               goto find_fail;
+               goto fail;
        }
 
        generation = btrfs_root_generation(&root->root_item);
@@ -1317,21 +1311,31 @@ struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
        if (IS_ERR(root->node)) {
                ret = PTR_ERR(root->node);
                root->node = NULL;
-               goto find_fail;
+               goto fail;
        } else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
                ret = -EIO;
-               goto find_fail;
+               goto fail;
        }
        root->commit_root = btrfs_root_node(root);
-out:
-       btrfs_free_path(path);
        return root;
-
-find_fail:
+fail:
        btrfs_put_root(root);
-alloc_fail:
-       root = ERR_PTR(ret);
-       goto out;
+       return ERR_PTR(ret);
+}
+
+struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
+                                       struct btrfs_key *key)
+{
+       struct btrfs_root *root;
+       struct btrfs_path *path;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return ERR_PTR(-ENOMEM);
+       root = read_tree_root_path(tree_root, path, key);
+       btrfs_free_path(path);
+
+       return root;
 }
 
 /*
@@ -1419,6 +1423,31 @@ static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
        return root;
 }
 
+static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info,
+                                               u64 objectid)
+{
+       if (objectid == BTRFS_ROOT_TREE_OBJECTID)
+               return btrfs_grab_root(fs_info->tree_root);
+       if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
+               return btrfs_grab_root(fs_info->extent_root);
+       if (objectid == BTRFS_CHUNK_TREE_OBJECTID)
+               return btrfs_grab_root(fs_info->chunk_root);
+       if (objectid == BTRFS_DEV_TREE_OBJECTID)
+               return btrfs_grab_root(fs_info->dev_root);
+       if (objectid == BTRFS_CSUM_TREE_OBJECTID)
+               return btrfs_grab_root(fs_info->csum_root);
+       if (objectid == BTRFS_QUOTA_TREE_OBJECTID)
+               return btrfs_grab_root(fs_info->quota_root) ?
+                       fs_info->quota_root : ERR_PTR(-ENOENT);
+       if (objectid == BTRFS_UUID_TREE_OBJECTID)
+               return btrfs_grab_root(fs_info->uuid_root) ?
+                       fs_info->uuid_root : ERR_PTR(-ENOENT);
+       if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
+               return btrfs_grab_root(fs_info->free_space_root) ?
+                       fs_info->free_space_root : ERR_PTR(-ENOENT);
+       return NULL;
+}
+
 int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
                         struct btrfs_root *root)
 {
@@ -1518,25 +1547,9 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
        struct btrfs_key key;
        int ret;
 
-       if (objectid == BTRFS_ROOT_TREE_OBJECTID)
-               return btrfs_grab_root(fs_info->tree_root);
-       if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
-               return btrfs_grab_root(fs_info->extent_root);
-       if (objectid == BTRFS_CHUNK_TREE_OBJECTID)
-               return btrfs_grab_root(fs_info->chunk_root);
-       if (objectid == BTRFS_DEV_TREE_OBJECTID)
-               return btrfs_grab_root(fs_info->dev_root);
-       if (objectid == BTRFS_CSUM_TREE_OBJECTID)
-               return btrfs_grab_root(fs_info->csum_root);
-       if (objectid == BTRFS_QUOTA_TREE_OBJECTID)
-               return btrfs_grab_root(fs_info->quota_root) ?
-                       fs_info->quota_root : ERR_PTR(-ENOENT);
-       if (objectid == BTRFS_UUID_TREE_OBJECTID)
-               return btrfs_grab_root(fs_info->uuid_root) ?
-                       fs_info->uuid_root : ERR_PTR(-ENOENT);
-       if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
-               return btrfs_grab_root(fs_info->free_space_root) ?
-                       fs_info->free_space_root : ERR_PTR(-ENOENT);
+       root = btrfs_get_global_root(fs_info, objectid);
+       if (root)
+               return root;
 again:
        root = btrfs_lookup_fs_root(fs_info, objectid);
        if (root) {
@@ -1621,6 +1634,52 @@ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
        return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
 }
 
+/*
+ * btrfs_get_fs_root_commit_root - return a root for the given objectid
+ * @fs_info:   the fs_info
+ * @objectid:  the objectid we need to lookup
+ *
+ * This is exclusively used for backref walking, and exists specifically because
+ * of how qgroups does lookups.  Qgroups will do a backref lookup at delayed ref
+ * creation time, which means we may have to read the tree_root in order to look
+ * up a fs root that is not in memory.  If the root is not in memory we will
+ * read the tree root commit root and look up the fs root from there.  This is a
+ * temporary root, it will not be inserted into the radix tree as it doesn't
+ * have the most uptodate information, it'll simply be discarded once the
+ * backref code is finished using the root.
+ */
+struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
+                                                struct btrfs_path *path,
+                                                u64 objectid)
+{
+       struct btrfs_root *root;
+       struct btrfs_key key;
+
+       ASSERT(path->search_commit_root && path->skip_locking);
+
+       /*
+        * This can return -ENOENT if we ask for a root that doesn't exist, but
+        * since this is called via the backref walking code we won't be looking
+        * up a root that doesn't exist, unless there's corruption.  So if root
+        * != NULL just return it.
+        */
+       root = btrfs_get_global_root(fs_info, objectid);
+       if (root)
+               return root;
+
+       root = btrfs_lookup_fs_root(fs_info, objectid);
+       if (root)
+               return root;
+
+       key.objectid = objectid;
+       key.type = BTRFS_ROOT_ITEM_KEY;
+       key.offset = (u64)-1;
+       root = read_tree_root_path(fs_info->tree_root, path, &key);
+       btrfs_release_path(path);
+
+       return root;
+}
+
 /*
  * called by the kthread helper functions to finally call the bio end_io
  * functions.  This is where read checksum verification actually happens
index fee69ced58b4b921df680dd9b39c87bdceb9df57..182540bdcea0fd3002435b034d6bd84c839949bb 100644 (file)
@@ -69,6 +69,9 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
                                     u64 objectid, bool check_ref);
 struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
                                         u64 objectid, dev_t anon_dev);
+struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
+                                                struct btrfs_path *path,
+                                                u64 objectid);
 
 void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
 int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
index 3b21fee13e77811e1d6cba72478ef8f805ef28a8..5fd60b13f4f83287df44432acfb305228c674495 100644 (file)
@@ -3185,7 +3185,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                struct btrfs_tree_block_info *bi;
                if (item_size < sizeof(*ei) + sizeof(*bi)) {
                        btrfs_crit(info,
-"invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %lu",
+"invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %zu",
                                   key.objectid, key.type, key.offset,
                                   owner_objectid, item_size,
                                   sizeof(*ei) + sizeof(*bi));
index 0ff659455b1ebc8588ef781cab448294579b14ba..87355a38a654702e154e3c9490d0c3da590743d6 100644 (file)
@@ -3628,7 +3628,8 @@ static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
                inode_lock_shared(inode);
                ret = btrfs_direct_IO(iocb, to);
                inode_unlock_shared(inode);
-               if (ret < 0)
+               if (ret < 0 || !iov_iter_count(to) ||
+                   iocb->ki_pos >= i_size_read(file_inode(iocb->ki_filp)))
                        return ret;
        }
 
index 936c3137c6467d30e8bca4fdb6398e51b11c52ae..da58c58ef9aa1cbb598306ca81af3583979b5f1b 100644 (file)
@@ -9672,10 +9672,16 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
                 * clear_offset by our extent size.
                 */
                clear_offset += ins.offset;
-               btrfs_dec_block_group_reservations(fs_info, ins.objectid);
 
                last_alloc = ins.offset;
                trans = insert_prealloc_file_extent(trans, inode, &ins, cur_offset);
+               /*
+                * Now that we inserted the prealloc extent we can finally
+                * decrement the number of reservations in the block group.
+                * If we did it before, we could race with relocation and have
+                * relocation miss the reserved extent, making it fail later.
+                */
+               btrfs_dec_block_group_reservations(fs_info, ins.objectid);
                if (IS_ERR(trans)) {
                        ret = PTR_ERR(trans);
                        btrfs_free_reserved_extent(fs_info, ins.objectid,
index 580899bdb9915bad902322d530808532f7e8a975..c54ea658663279aba968fa8a9365efcbe3b94ee1 100644 (file)
@@ -1026,6 +1026,10 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
                btrfs_item_key_to_cpu(leaf, &found_key, slot);
 
                if (found_key.type == BTRFS_ROOT_REF_KEY) {
+
+                       /* Release locks on tree_root before we access quota_root */
+                       btrfs_release_path(path);
+
                        ret = add_qgroup_item(trans, quota_root,
                                              found_key.offset);
                        if (ret) {
@@ -1044,6 +1048,20 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
                                btrfs_abort_transaction(trans, ret);
                                goto out_free_path;
                        }
+                       ret = btrfs_search_slot_for_read(tree_root, &found_key,
+                                                        path, 1, 0);
+                       if (ret < 0) {
+                               btrfs_abort_transaction(trans, ret);
+                               goto out_free_path;
+                       }
+                       if (ret > 0) {
+                               /*
+                                * Shouldn't happen, but in case it does we
+                                * don't need to do the btrfs_next_item, just
+                                * continue.
+                                */
+                               continue;
+                       }
                }
                ret = btrfs_next_item(tree_root, path);
                if (ret < 0) {
index 9d4f5316a7e8be1464d9349b6ef9a9fb20fccfcc..d9a166eb344e6484a6e86b11a4c5e7ca09dc35fa 100644 (file)
@@ -421,6 +421,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
                if (!dev->bdev)
                        continue;
 
+               if (test_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state))
+                       continue;
+
                if (dev_replace_is_ongoing &&
                    dev == fs_info->dev_replace.tgtdev) {
                        /*
@@ -445,6 +448,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
                }
                have_zone = 1;
        }
+       if (!have_zone)
+               radix_tree_delete(&fs_info->reada_tree, index);
        spin_unlock(&fs_info->reada_lock);
        up_read(&fs_info->dev_replace.rwsem);
 
@@ -1020,3 +1025,45 @@ void btrfs_reada_detach(void *handle)
 
        kref_put(&rc->refcnt, reada_control_release);
 }
+
+/*
+ * Before removing a device (device replace or device remove ioctls), call this
+ * function to wait for all existing readahead requests on the device and to
+ * make sure no one queues more readahead requests for the device.
+ *
+ * Must be called without holding neither the device list mutex nor the device
+ * replace semaphore, otherwise it will deadlock.
+ */
+void btrfs_reada_remove_dev(struct btrfs_device *dev)
+{
+       struct btrfs_fs_info *fs_info = dev->fs_info;
+
+       /* Serialize with readahead extent creation at reada_find_extent(). */
+       spin_lock(&fs_info->reada_lock);
+       set_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state);
+       spin_unlock(&fs_info->reada_lock);
+
+       /*
+        * There might be readahead requests added to the radix trees which
+        * were not yet added to the readahead work queue. We need to start
+        * them and wait for their completion, otherwise we can end up with
+        * use-after-free problems when dropping the last reference on the
+        * readahead extents and their zones, as they need to access the
+        * device structure.
+        */
+       reada_start_machine(fs_info);
+       btrfs_flush_workqueue(fs_info->readahead_workers);
+}
+
+/*
+ * If when removing a device (device replace or device remove ioctls) an error
+ * happens after calling btrfs_reada_remove_dev(), call this to undo what that
+ * function did. This is safe to call even if btrfs_reada_remove_dev() was not
+ * called before.
+ */
+void btrfs_reada_undo_remove_dev(struct btrfs_device *dev)
+{
+       spin_lock(&dev->fs_info->reada_lock);
+       clear_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state);
+       spin_unlock(&dev->fs_info->reada_lock);
+}
index f0ffd5ee77bd5700847bb580417a3f3242f88630..8784b74f5232e797ceacede9c31bf19e64c9247d 100644 (file)
@@ -760,18 +760,36 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf,
        u64 type;
        u64 features;
        bool mixed = false;
+       int raid_index;
+       int nparity;
+       int ncopies;
 
        length = btrfs_chunk_length(leaf, chunk);
        stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
        num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
        sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
        type = btrfs_chunk_type(leaf, chunk);
+       raid_index = btrfs_bg_flags_to_raid_index(type);
+       ncopies = btrfs_raid_array[raid_index].ncopies;
+       nparity = btrfs_raid_array[raid_index].nparity;
 
        if (!num_stripes) {
                chunk_err(leaf, chunk, logical,
                          "invalid chunk num_stripes, have %u", num_stripes);
                return -EUCLEAN;
        }
+       if (num_stripes < ncopies) {
+               chunk_err(leaf, chunk, logical,
+                         "invalid chunk num_stripes < ncopies, have %u < %d",
+                         num_stripes, ncopies);
+               return -EUCLEAN;
+       }
+       if (nparity && num_stripes == nparity) {
+               chunk_err(leaf, chunk, logical,
+                         "invalid chunk num_stripes == nparity, have %u == %d",
+                         num_stripes, nparity);
+               return -EUCLEAN;
+       }
        if (!IS_ALIGNED(logical, fs_info->sectorsize)) {
                chunk_err(leaf, chunk, logical,
                "invalid chunk logical, have %llu should aligned to %u",
index 58b9c419a2b6b257b61f43a21dd289560878a55e..b1e48078c3189c9527218ee18872415f6e7b1e33 100644 (file)
@@ -431,7 +431,7 @@ static struct btrfs_device *__alloc_device(struct btrfs_fs_info *fs_info)
 
        atomic_set(&dev->reada_in_flight, 0);
        atomic_set(&dev->dev_stats_ccnt, 0);
-       btrfs_device_data_ordered_init(dev);
+       btrfs_device_data_ordered_init(dev, fs_info);
        INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
        INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
        extent_io_tree_init(fs_info, &dev->alloc_state,
@@ -2099,6 +2099,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
 
        mutex_unlock(&uuid_mutex);
        ret = btrfs_shrink_device(device, 0);
+       if (!ret)
+               btrfs_reada_remove_dev(device);
        mutex_lock(&uuid_mutex);
        if (ret)
                goto error_undo;
@@ -2179,6 +2181,7 @@ out:
        return ret;
 
 error_undo:
+       btrfs_reada_undo_remove_dev(device);
        if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
                mutex_lock(&fs_info->chunk_mutex);
                list_add(&device->dev_alloc_list,
index bf27ac07d3153912c4683c2c7534970639854e78..232f02bd214fc44e013574357c1e3cd926c6e77f 100644 (file)
@@ -39,10 +39,10 @@ struct btrfs_io_geometry {
 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
 #include <linux/seqlock.h>
 #define __BTRFS_NEED_DEVICE_DATA_ORDERED
-#define btrfs_device_data_ordered_init(device) \
-       seqcount_init(&device->data_seqcount)
+#define btrfs_device_data_ordered_init(device, info)                           \
+       seqcount_mutex_init(&device->data_seqcount, &info->chunk_mutex)
 #else
-#define btrfs_device_data_ordered_init(device) do { } while (0)
+#define btrfs_device_data_ordered_init(device, info) do { } while (0)
 #endif
 
 #define BTRFS_DEV_STATE_WRITEABLE      (0)
@@ -50,6 +50,7 @@ struct btrfs_io_geometry {
 #define BTRFS_DEV_STATE_MISSING                (2)
 #define BTRFS_DEV_STATE_REPLACE_TGT    (3)
 #define BTRFS_DEV_STATE_FLUSH_SENT     (4)
+#define BTRFS_DEV_STATE_NO_READA       (5)
 
 struct btrfs_device {
        struct list_head dev_list; /* device_list_mutex */
@@ -71,7 +72,8 @@ struct btrfs_device {
        blk_status_t last_flush_error;
 
 #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED
-       seqcount_t data_seqcount;
+       /* A seqcount_t with associated chunk_mutex (for lockdep) */
+       seqcount_mutex_t data_seqcount;
 #endif
 
        /* the internal btrfs device id */
@@ -162,11 +164,9 @@ btrfs_device_get_##name(const struct btrfs_device *dev)                    \
 static inline void                                                     \
 btrfs_device_set_##name(struct btrfs_device *dev, u64 size)            \
 {                                                                      \
-       preempt_disable();                                              \
        write_seqcount_begin(&dev->data_seqcount);                      \
        dev->name = size;                                               \
        write_seqcount_end(&dev->data_seqcount);                        \
-       preempt_enable();                                               \
 }
 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
 #define BTRFS_DEVICE_GETSET_FUNCS(name)                                        \