btrfs: qgroup: Only trace data extents in leaves if we're relocating data block group
authorQu Wenruo <wqu@suse.com>
Thu, 27 Sep 2018 06:42:35 +0000 (14:42 +0800)
committerDavid Sterba <dsterba@suse.com>
Mon, 15 Oct 2018 15:23:36 +0000 (17:23 +0200)
For qgroup_trace_extent_swap(), if we find one leaf that needs to be
traced, we will also iterate all file extents and trace them.

This is OK if we're relocating data block groups, but if we're
relocating metadata block groups, balance code itself has ensured that
both subtree of file tree and reloc tree contain the same contents.

That's to say, if we're relocating metadata block groups, all file
extents in reloc and file tree should match, thus no need to trace them.
This should reduce the total number of dirty extents processed in metadata
block group balance.

[[Benchmark]] (with all previous enhancement)
Hardware:
VM 4G vRAM, 8 vCPUs,
disk is using 'unsafe' cache mode,
backing device is SAMSUNG 850 evo SSD.
Host has 16G ram.

Mkfs parameter:
--nodesize 4K (To bump up tree size)

Initial subvolume contents:
4G data copied from /usr and /lib.
(With enough regular small files)

Snapshots:
16 snapshots of the original subvolume.
each snapshot has 3 random files modified.

balance parameter:
-m

So the content should be pretty similar to a real world root fs layout.

                     | v4.19-rc1    | w/ patchset    | diff (*)
---------------------------------------------------------------
relocated extents    | 22929        | 22851          | -0.3%
qgroup dirty extents | 227757       | 140886         | -38.1%
time (sys)           | 65.253s      | 37.464s        | -42.6%
time (real)          | 74.032s      | 44.722s        | -39.6%

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/qgroup.c
fs/btrfs/qgroup.h
fs/btrfs/relocation.c

index 6b35b34810854a77d2d5ef070ff5317e23f0ad22..ac9690f36a94b34084f116b352530fdbdd952ef3 100644 (file)
@@ -1761,7 +1761,8 @@ static int adjust_slots_upwards(struct btrfs_path *path, int root_level)
 static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans,
                                    struct extent_buffer *src_eb,
                                    struct btrfs_path *dst_path,
-                                   int dst_level, int root_level)
+                                   int dst_level, int root_level,
+                                   bool trace_leaf)
 {
        struct btrfs_key key;
        struct btrfs_path *src_path;
@@ -1863,7 +1864,7 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans,
                goto out;
 
        /* Record leaf file extents */
-       if (dst_level == 0) {
+       if (dst_level == 0 && trace_leaf) {
                ret = btrfs_qgroup_trace_leaf_items(trans, src_path->nodes[0]);
                if (ret < 0)
                        goto out;
@@ -1900,7 +1901,7 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
                                           struct extent_buffer *src_eb,
                                           struct btrfs_path *dst_path,
                                           int cur_level, int root_level,
-                                          u64 last_snapshot)
+                                          u64 last_snapshot, bool trace_leaf)
 {
        struct btrfs_fs_info *fs_info = trans->fs_info;
        struct extent_buffer *eb;
@@ -1972,7 +1973,7 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
 
        /* Now record this tree block and its counter part for qgroups */
        ret = qgroup_trace_extent_swap(trans, src_eb, dst_path, cur_level,
-                                      root_level);
+                                      root_level, trace_leaf);
        if (ret < 0)
                goto cleanup;
 
@@ -1989,7 +1990,7 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
                        /* Recursive call (at most 7 times) */
                        ret = qgroup_trace_new_subtree_blocks(trans, src_eb,
                                        dst_path, cur_level - 1, root_level,
-                                       last_snapshot);
+                                       last_snapshot, trace_leaf);
                        if (ret < 0)
                                goto cleanup;
                }
@@ -2028,6 +2029,7 @@ out:
  * @dst_parent, @dst_slot: pointer to dst (reloc tree) eb.
  */
 int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
+                               struct btrfs_block_group_cache *bg_cache,
                                struct extent_buffer *src_parent, int src_slot,
                                struct extent_buffer *dst_parent, int dst_slot,
                                u64 last_snapshot)
@@ -2037,6 +2039,7 @@ int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
        struct btrfs_key first_key;
        struct extent_buffer *src_eb = NULL;
        struct extent_buffer *dst_eb = NULL;
+       bool trace_leaf = false;
        u64 child_gen;
        u64 child_bytenr;
        int level;
@@ -2055,6 +2058,12 @@ int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
                return -EUCLEAN;
        }
 
+       /*
+        * Only trace leaf if we're relocating data block groups, this could
+        * reduce tons of data extents tracing for meta/sys bg relocation.
+        */
+       if (bg_cache->flags & BTRFS_BLOCK_GROUP_DATA)
+               trace_leaf = true;
        /* Read out real @src_eb, pointed by @src_parent and @src_slot */
        child_bytenr = btrfs_node_blockptr(src_parent, src_slot);
        child_gen = btrfs_node_ptr_generation(src_parent, src_slot);
@@ -2099,7 +2108,7 @@ int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
 
        /* Do the generation-aware breadth-first search */
        ret = qgroup_trace_new_subtree_blocks(trans, src_eb, dst_path, level,
-                                             level, last_snapshot);
+                                             level, last_snapshot, trace_leaf);
        if (ret < 0)
                goto out;
        ret = 0;
index 1aaf4c2769008137282dca87ac948237bf4a2241..80ebeb3ab5ba01ac274e63e4cf7dd2e7c75427b3 100644 (file)
@@ -238,6 +238,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
                               u64 root_gen, int root_level);
 
 int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
+                               struct btrfs_block_group_cache *bg_cache,
                                struct extent_buffer *src_parent, int src_slot,
                                struct extent_buffer *dst_parent, int dst_slot,
                                u64 last_snapshot);
index d10357122aa1cf22f4d7b6b8addbaea1a6911f77..3e6e3d93caad89ad5d2a6a829376e9821a2d96f0 100644 (file)
@@ -1744,7 +1744,7 @@ int memcmp_node_keys(struct extent_buffer *eb, int slot,
  * errors, a negative error number is returned.
  */
 static noinline_for_stack
-int replace_path(struct btrfs_trans_handle *trans,
+int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
                 struct btrfs_root *dest, struct btrfs_root *src,
                 struct btrfs_path *path, struct btrfs_key *next_key,
                 int lowest_level, int max_level)
@@ -1888,9 +1888,9 @@ again:
                 *    and tree block numbers, if current trans doesn't free
                 *    data reloc tree inode.
                 */
-               ret = btrfs_qgroup_trace_subtree_swap(trans, parent, slot,
-                               path->nodes[level], path->slots[level],
-                               last_snapshot);
+               ret = btrfs_qgroup_trace_subtree_swap(trans, rc->block_group,
+                               parent, slot, path->nodes[level],
+                               path->slots[level], last_snapshot);
                if (ret < 0)
                        break;
 
@@ -2209,7 +2209,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
                    btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
                        ret = 0;
                } else {
-                       ret = replace_path(trans, root, reloc_root, path,
+                       ret = replace_path(trans, rc, root, reloc_root, path,
                                           &next_key, level, max_level);
                }
                if (ret < 0) {