Merge tag 'for-6.5-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 19 Aug 2023 15:57:07 +0000 (17:57 +0200)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 19 Aug 2023 15:57:07 +0000 (17:57 +0200)
Pull btrfs fixes from David Sterba:

 - fix infinite loop in readdir(), could happen in a big directory when
   files get renamed during enumeration

 - fix extent map handling of skipped pinned ranges

 - fix a corner case when handling ordered extent length

 - fix a potential crash when balance cancel races with pause

 - verify correct uuid when starting scrub or device replace

* tag 'for-6.5-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix incorrect splitting in btrfs_drop_extent_map_range
  btrfs: fix BUG_ON condition in btrfs_cancel_balance
  btrfs: only subtract from len_to_oe_boundary when it is tracking an extent
  btrfs: fix replace/scrub failure with metadata_uuid
  btrfs: fix infinite directory reads

fs/btrfs/ctree.h
fs/btrfs/delayed-inode.c
fs/btrfs/delayed-inode.h
fs/btrfs/extent_io.c
fs/btrfs/extent_map.c
fs/btrfs/inode.c
fs/btrfs/scrub.c
fs/btrfs/volumes.c

index f2d2b313bde5ab6c5b24229fa2cfda167b890b43..9419f4e37a58c3d20164d4f6bf9fb447792e45c2 100644 (file)
@@ -443,6 +443,7 @@ struct btrfs_drop_extents_args {
 
 struct btrfs_file_private {
        void *filldir_buf;
+       u64 last_index;
        struct extent_state *llseek_cached_state;
 };
 
index 6b457b010cbc4ae204bc211c521f389ad46ded4c..6d51db066503b87f48da7fc870dd78060cf2c981 100644 (file)
@@ -1632,6 +1632,7 @@ int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
 }
 
 bool btrfs_readdir_get_delayed_items(struct inode *inode,
+                                    u64 last_index,
                                     struct list_head *ins_list,
                                     struct list_head *del_list)
 {
@@ -1651,14 +1652,14 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
 
        mutex_lock(&delayed_node->mutex);
        item = __btrfs_first_delayed_insertion_item(delayed_node);
-       while (item) {
+       while (item && item->index <= last_index) {
                refcount_inc(&item->refs);
                list_add_tail(&item->readdir_list, ins_list);
                item = __btrfs_next_delayed_item(item);
        }
 
        item = __btrfs_first_delayed_deletion_item(delayed_node);
-       while (item) {
+       while (item && item->index <= last_index) {
                refcount_inc(&item->refs);
                list_add_tail(&item->readdir_list, del_list);
                item = __btrfs_next_delayed_item(item);
index 4f21daa3dbc7b306804d49a021e0fcae4f749b3e..dc1085b2a3976668e875d0dbf8b4d7aaf6b22dcd 100644 (file)
@@ -148,6 +148,7 @@ void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info);
 
 /* Used for readdir() */
 bool btrfs_readdir_get_delayed_items(struct inode *inode,
+                                    u64 last_index,
                                     struct list_head *ins_list,
                                     struct list_head *del_list);
 void btrfs_readdir_put_delayed_items(struct inode *inode,
index ca765d62324f522e5c0fec3c5f769e818100eb31..90ad3006ef3a7f75ff54f94bf90af65b518cd026 100644 (file)
@@ -902,7 +902,30 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
                size -= len;
                pg_offset += len;
                disk_bytenr += len;
-               bio_ctrl->len_to_oe_boundary -= len;
+
+               /*
+                * len_to_oe_boundary defaults to U32_MAX, which isn't page or
+                * sector aligned.  alloc_new_bio() then sets it to the end of
+                * our ordered extent for writes into zoned devices.
+                *
+                * When len_to_oe_boundary is tracking an ordered extent, we
+                * trust the ordered extent code to align things properly, and
+                * the check above to cap our write to the ordered extent
+                * boundary is correct.
+                *
+                * When len_to_oe_boundary is U32_MAX, the cap above would
+                * result in a 4095 byte IO for the last page right before
+                * we hit the bio limit of UINT_MAX.  bio_add_page() has all
+                * the checks required to make sure we don't overflow the bio,
+                * and we should just ignore len_to_oe_boundary completely
+                * unless we're using it to track an ordered extent.
+                *
+                * It's pretty hard to make a bio sized U32_MAX, but it can
+                * happen when the page cache is able to feed us contiguous
+                * pages for large extents.
+                */
+               if (bio_ctrl->len_to_oe_boundary != U32_MAX)
+                       bio_ctrl->len_to_oe_boundary -= len;
 
                /* Ordered extent boundary: move on to a new bio. */
                if (bio_ctrl->len_to_oe_boundary == 0)
index 0cdb3e86f29b59a362c5eabcc2a37fc61da5607e..a6d8368ed0edd5784301ed40e5bed5e90ca6b60e 100644 (file)
@@ -760,8 +760,6 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
 
                if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
                        start = em_end;
-                       if (end != (u64)-1)
-                               len = start + len - em_end;
                        goto next;
                }
 
@@ -829,8 +827,8 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
                                if (!split)
                                        goto remove_em;
                        }
-                       split->start = start + len;
-                       split->len = em_end - (start + len);
+                       split->start = end;
+                       split->len = em_end - end;
                        split->block_start = em->block_start;
                        split->flags = flags;
                        split->compress_type = em->compress_type;
index 9055e19b01ef10feecc51fe0f8409e6ccd8463d6..aa090b0b5d298c236a4a942d381a6e2efaab85cc 100644 (file)
@@ -5872,6 +5872,74 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
        return d_splice_alias(inode, dentry);
 }
 
+/*
+ * Find the highest existing sequence number in a directory and then set the
+ * in-memory index_cnt variable to the first free sequence number.
+ */
+static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
+{
+       struct btrfs_root *root = inode->root;
+       struct btrfs_key key, found_key;
+       struct btrfs_path *path;
+       struct extent_buffer *leaf;
+       int ret;
+
+       key.objectid = btrfs_ino(inode);
+       key.type = BTRFS_DIR_INDEX_KEY;
+       key.offset = (u64)-1;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+       /* FIXME: we should be able to handle this */
+       if (ret == 0)
+               goto out;
+       ret = 0;
+
+       if (path->slots[0] == 0) {
+               inode->index_cnt = BTRFS_DIR_START_INDEX;
+               goto out;
+       }
+
+       path->slots[0]--;
+
+       leaf = path->nodes[0];
+       btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+       if (found_key.objectid != btrfs_ino(inode) ||
+           found_key.type != BTRFS_DIR_INDEX_KEY) {
+               inode->index_cnt = BTRFS_DIR_START_INDEX;
+               goto out;
+       }
+
+       inode->index_cnt = found_key.offset + 1;
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index)
+{
+       if (dir->index_cnt == (u64)-1) {
+               int ret;
+
+               ret = btrfs_inode_delayed_dir_index_count(dir);
+               if (ret) {
+                       ret = btrfs_set_inode_index_count(dir);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       *index = dir->index_cnt;
+
+       return 0;
+}
+
 /*
  * All this infrastructure exists because dir_emit can fault, and we are holding
  * the tree lock when doing readdir.  For now just allocate a buffer and copy
@@ -5884,10 +5952,17 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 static int btrfs_opendir(struct inode *inode, struct file *file)
 {
        struct btrfs_file_private *private;
+       u64 last_index;
+       int ret;
+
+       ret = btrfs_get_dir_last_index(BTRFS_I(inode), &last_index);
+       if (ret)
+               return ret;
 
        private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
        if (!private)
                return -ENOMEM;
+       private->last_index = last_index;
        private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
        if (!private->filldir_buf) {
                kfree(private);
@@ -5954,7 +6029,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
 
        INIT_LIST_HEAD(&ins_list);
        INIT_LIST_HEAD(&del_list);
-       put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
+       put = btrfs_readdir_get_delayed_items(inode, private->last_index,
+                                             &ins_list, &del_list);
 
 again:
        key.type = BTRFS_DIR_INDEX_KEY;
@@ -5972,6 +6048,8 @@ again:
                        break;
                if (found_key.offset < ctx->pos)
                        continue;
+               if (found_key.offset > private->last_index)
+                       break;
                if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
                        continue;
                di = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
@@ -6107,57 +6185,6 @@ static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
        return dirty ? btrfs_dirty_inode(BTRFS_I(inode)) : 0;
 }
 
-/*
- * find the highest existing sequence number in a directory
- * and then set the in-memory index_cnt variable to reflect
- * free sequence numbers
- */
-static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
-{
-       struct btrfs_root *root = inode->root;
-       struct btrfs_key key, found_key;
-       struct btrfs_path *path;
-       struct extent_buffer *leaf;
-       int ret;
-
-       key.objectid = btrfs_ino(inode);
-       key.type = BTRFS_DIR_INDEX_KEY;
-       key.offset = (u64)-1;
-
-       path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
-
-       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-       if (ret < 0)
-               goto out;
-       /* FIXME: we should be able to handle this */
-       if (ret == 0)
-               goto out;
-       ret = 0;
-
-       if (path->slots[0] == 0) {
-               inode->index_cnt = BTRFS_DIR_START_INDEX;
-               goto out;
-       }
-
-       path->slots[0]--;
-
-       leaf = path->nodes[0];
-       btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
-       if (found_key.objectid != btrfs_ino(inode) ||
-           found_key.type != BTRFS_DIR_INDEX_KEY) {
-               inode->index_cnt = BTRFS_DIR_START_INDEX;
-               goto out;
-       }
-
-       inode->index_cnt = found_key.offset + 1;
-out:
-       btrfs_free_path(path);
-       return ret;
-}
-
 /*
  * helper to find a free sequence number in a given directory.  This current
  * code is very simple, later versions will do smarter things in the btree
index 4cae41bd6de0796a5d21af40ecc8135e7534ed75..7289f5bff397dce53710aabc8e0907634395c78b 100644 (file)
@@ -605,7 +605,8 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
                              btrfs_stack_header_bytenr(header), logical);
                return;
        }
-       if (memcmp(header->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE) != 0) {
+       if (memcmp(header->fsid, fs_info->fs_devices->metadata_uuid,
+                  BTRFS_FSID_SIZE) != 0) {
                bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
                bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
                btrfs_warn_rl(fs_info,
index 2ecb76cf3d91f8c0941359ee0c4d6113e1a93a79..6aa9bf3661ac87b9f4f14446a44f4e4c26ac1fef 100644 (file)
@@ -4638,8 +4638,7 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
                }
        }
 
-       BUG_ON(fs_info->balance_ctl ||
-               test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
+       ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
        atomic_dec(&fs_info->balance_cancel_req);
        mutex_unlock(&fs_info->balance_mutex);
        return 0;