btrfs: zoned: redirty released extent buffers
authorNaohiro Aota <naohiro.aota@wdc.com>
Thu, 4 Feb 2021 10:21:54 +0000 (19:21 +0900)
committerDavid Sterba <dsterba@suse.com>
Tue, 9 Feb 2021 01:46:04 +0000 (02:46 +0100)
Tree manipulating operations like merging nodes often release
once-allocated tree nodes. Such nodes are cleaned so that pages in the
node are not uselessly written out. On zoned volumes, however, such
optimization blocks the following IOs as the cancellation of the write
out of the freed blocks breaks the sequential write sequence expected by
the device.

Introduce a list of clean and unwritten extent buffers that have been
released in a transaction. Redirty the buffers so that
btree_write_cache_pages() can send proper bios to the devices.

Besides it clears the entire content of the extent buffer not to confuse
raw block scanners e.g. 'btrfs check'. By clearing the content,
csum_dirty_buffer() complains about bytenr mismatch, so avoid the
checking and checksum using newly introduced buffer flag
EXTENT_BUFFER_NO_CHECK.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/transaction.c
fs/btrfs/transaction.h
fs/btrfs/tree-log.c
fs/btrfs/zoned.c
fs/btrfs/zoned.h

index 8551b0fc1b22082da57d42ccabfd8c9812dab7df..eb1afd7d89f75fd3446d9b6880320c6ba7ab75a0 100644 (file)
@@ -459,6 +459,12 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct bio_vec *bvec
                return 0;
 
        found_start = btrfs_header_bytenr(eb);
+
+       if (test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags)) {
+               WARN_ON(found_start != 0);
+               return 0;
+       }
+
        /*
         * Please do not consolidate these warnings into a single if.
         * It is useful to know what went wrong.
@@ -4774,6 +4780,8 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
                                     EXTENT_DIRTY);
        btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents);
 
+       btrfs_free_redirty_list(cur_trans);
+
        cur_trans->state =TRANS_STATE_COMPLETED;
        wake_up(&cur_trans->commit_wait);
 }
index 3f83ca5030511980607a71ff1f45479a3c8ad752..dddcb8513c774d1c81d2325f3495cff97a8f1d87 100644 (file)
@@ -3292,8 +3292,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 
                if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
                        ret = check_ref_cleanup(trans, buf->start);
-                       if (!ret)
+                       if (!ret) {
+                               btrfs_redirty_list_add(trans->transaction, buf);
                                goto out;
+                       }
                }
 
                cache = btrfs_lookup_block_group(fs_info, buf->start);
@@ -3304,6 +3306,13 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
                        goto out;
                }
 
+               if (btrfs_is_zoned(fs_info)) {
+                       btrfs_redirty_list_add(trans->transaction, buf);
+                       pin_down_extent(trans, cache, buf->start, buf->len, 1);
+                       btrfs_put_block_group(cache);
+                       goto out;
+               }
+
                WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
 
                btrfs_add_free_space(cache, buf->start, buf->len);
@@ -4635,6 +4644,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
        __btrfs_tree_lock(buf, nest);
        btrfs_clean_tree_block(buf);
        clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
+       clear_bit(EXTENT_BUFFER_NO_CHECK, &buf->bflags);
 
        set_extent_buffer_uptodate(buf);
 
index 4be117adda3356640336883452e610c6b56af29b..eedcfb40c356cbd8733aac9d69cbaa17e28928ec 100644 (file)
@@ -25,6 +25,7 @@
 #include "backref.h"
 #include "disk-io.h"
 #include "subpage.h"
+#include "zoned.h"
 
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
@@ -5182,6 +5183,7 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
 
        btrfs_leak_debug_add(&fs_info->eb_leak_lock, &eb->leak_list,
                             &fs_info->allocated_ebs);
+       INIT_LIST_HEAD(&eb->release_list);
 
        spin_lock_init(&eb->refs_lock);
        atomic_set(&eb->refs, 1);
@@ -6111,6 +6113,8 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
        char *src = (char *)srcv;
        unsigned long i = get_eb_page_index(start);
 
+       WARN_ON(test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags));
+
        if (check_eb_range(eb, start, len))
                return;
 
index 047b3e66897fd239c511bc6f9f15aecc758f205e..824640cb0ace657959c744a074e50161b7ffe90a 100644 (file)
@@ -31,6 +31,7 @@ enum {
        EXTENT_BUFFER_IN_TREE,
        /* write IO error */
        EXTENT_BUFFER_WRITE_ERR,
+       EXTENT_BUFFER_NO_CHECK,
 };
 
 /* these are flags for __process_pages_contig */
@@ -93,6 +94,7 @@ struct extent_buffer {
        struct rw_semaphore lock;
 
        struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
+       struct list_head release_list;
 #ifdef CONFIG_BTRFS_DEBUG
        struct list_head leak_list;
 #endif
index 00c0680dac3ae68d67d649430970ec6b8ce43bc3..acff6bb49a97b324cce8666c61b1f19b9432e4ca 100644 (file)
@@ -21,6 +21,7 @@
 #include "qgroup.h"
 #include "block-group.h"
 #include "space-info.h"
+#include "zoned.h"
 
 #define BTRFS_ROOT_TRANS_TAG 0
 
@@ -380,6 +381,8 @@ loop:
        spin_lock_init(&cur_trans->dirty_bgs_lock);
        INIT_LIST_HEAD(&cur_trans->deleted_bgs);
        spin_lock_init(&cur_trans->dropped_roots_lock);
+       INIT_LIST_HEAD(&cur_trans->releasing_ebs);
+       spin_lock_init(&cur_trans->releasing_ebs_lock);
        list_add_tail(&cur_trans->list, &fs_info->trans_list);
        extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
                        IO_TREE_TRANS_DIRTY_PAGES, fs_info->btree_inode);
@@ -2350,6 +2353,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
                goto scrub_continue;
        }
 
+       /*
+        * At this point, we should have written all the tree blocks allocated
+        * in this transaction. So it's now safe to free the redirtyied extent
+        * buffers.
+        */
+       btrfs_free_redirty_list(cur_trans);
+
        ret = write_all_supers(fs_info, 0);
        /*
         * the super is written, we can safely allow the tree-loggers
index 935bd6958a8a28f24e17c5ff03f18c8b456006a1..6335716e513ff81743274ca7df78c07951667135 100644 (file)
@@ -93,6 +93,9 @@ struct btrfs_transaction {
         */
        atomic_t pending_ordered;
        wait_queue_head_t pending_wait;
+
+       spinlock_t releasing_ebs_lock;
+       struct list_head releasing_ebs;
 };
 
 #define __TRANS_FREEZABLE      (1U << 0)
index 4c7b283ed2b2509b5f244fbadc910e9ffbaa6a74..c02eeeac439cb678d422cecd25d652aece62c3bd 100644 (file)
@@ -19,6 +19,7 @@
 #include "qgroup.h"
 #include "block-group.h"
 #include "space-info.h"
+#include "zoned.h"
 
 /* magic values for the inode_only field in btrfs_log_inode:
  *
@@ -2752,6 +2753,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
                                                free_extent_buffer(next);
                                                return ret;
                                        }
+                                       btrfs_redirty_list_add(
+                                               trans->transaction, next);
                                } else {
                                        if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
                                                clear_extent_buffer_dirty(next);
@@ -3296,6 +3299,9 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
        clear_extent_bits(&log->dirty_log_pages, 0, (u64)-1,
                          EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
        extent_io_tree_release(&log->log_csum_range);
+
+       if (trans && log->node)
+               btrfs_redirty_list_add(trans->transaction, log->node);
        btrfs_put_root(log);
 }
 
index c5f9f4c6f20bd0801c6375c747446bf92b8d2302..1de67d789b83673785f9e81833a9b6f571d50ca6 100644 (file)
@@ -10,6 +10,7 @@
 #include "rcu-string.h"
 #include "disk-io.h"
 #include "block-group.h"
+#include "transaction.h"
 
 /* Maximum number of zones to report per blkdev_report_zones() call */
 #define BTRFS_REPORT_NR_ZONES   4096
@@ -1181,3 +1182,39 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
        /* Should not have any excluded extents. Just in case, though */
        btrfs_free_excluded_extents(cache);
 }
+
+void btrfs_redirty_list_add(struct btrfs_transaction *trans,
+                           struct extent_buffer *eb)
+{
+       struct btrfs_fs_info *fs_info = eb->fs_info;
+
+       if (!btrfs_is_zoned(fs_info) ||
+           btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN) ||
+           !list_empty(&eb->release_list))
+               return;
+
+       set_extent_buffer_dirty(eb);
+       set_extent_bits_nowait(&trans->dirty_pages, eb->start,
+                              eb->start + eb->len - 1, EXTENT_DIRTY);
+       memzero_extent_buffer(eb, 0, eb->len);
+       set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags);
+
+       spin_lock(&trans->releasing_ebs_lock);
+       list_add_tail(&eb->release_list, &trans->releasing_ebs);
+       spin_unlock(&trans->releasing_ebs_lock);
+       atomic_inc(&eb->refs);
+}
+
+void btrfs_free_redirty_list(struct btrfs_transaction *trans)
+{
+       spin_lock(&trans->releasing_ebs_lock);
+       while (!list_empty(&trans->releasing_ebs)) {
+               struct extent_buffer *eb;
+
+               eb = list_first_entry(&trans->releasing_ebs,
+                                     struct extent_buffer, release_list);
+               list_del_init(&eb->release_list);
+               free_extent_buffer(eb);
+       }
+       spin_unlock(&trans->releasing_ebs_lock);
+}
index 37304d1675e6da4cdc41d2b57e5aeed49acd95c5..b250a578e38c4ef562585287fe987306b2519431 100644 (file)
@@ -43,6 +43,9 @@ int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
 int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size);
 int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new);
 void btrfs_calc_zone_unusable(struct btrfs_block_group *cache);
+void btrfs_redirty_list_add(struct btrfs_transaction *trans,
+                           struct extent_buffer *eb);
+void btrfs_free_redirty_list(struct btrfs_transaction *trans);
 #else /* CONFIG_BLK_DEV_ZONED */
 static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
                                     struct blk_zone *zone)
@@ -126,6 +129,10 @@ static inline int btrfs_load_block_group_zone_info(
 
 static inline void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) { }
 
+static inline void btrfs_redirty_list_add(struct btrfs_transaction *trans,
+                                         struct extent_buffer *eb) { }
+static inline void btrfs_free_redirty_list(struct btrfs_transaction *trans) { }
+
 #endif
 
 static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)