btrfs: allow block group background reclaim for non-zoned filesystems
authorJosef Bacik <josef@toxicpanda.com>
Tue, 29 Mar 2022 08:56:07 +0000 (01:56 -0700)
committerDavid Sterba <dsterba@suse.com>
Mon, 16 May 2022 15:03:11 +0000 (17:03 +0200)
This will allow us to set a threshold for block groups to be
automatically relocated even if we don't have zoned devices.

We have found this feature invaluable at Facebook due to how our
workload interacts with the allocator.  We have been using this in
production for months with only a single problem that has already been
fixed.

Tested-by: Pankaj Raghav <p.raghav@samsung.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/block-group.c

index aee2171450264ed957937acb929ff2bfc022838f..fb3ce80fd7e4034694c548aa6259705f462ec183 100644 (file)
@@ -3198,6 +3198,31 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
        return ret;
 }
 
+static inline bool should_reclaim_block_group(struct btrfs_block_group *bg,
+                                             u64 bytes_freed)
+{
+       const struct btrfs_space_info *space_info = bg->space_info;
+       const int reclaim_thresh = READ_ONCE(space_info->bg_reclaim_threshold);
+       const u64 new_val = bg->used;
+       const u64 old_val = new_val + bytes_freed;
+       u64 thresh;
+
+       if (reclaim_thresh == 0)
+               return false;
+
+       thresh = div_factor_fine(bg->length, reclaim_thresh);
+
+       /*
+        * If we were below the threshold before don't reclaim, we are likely a
+        * brand new block group and we don't want to relocate new block groups.
+        */
+       if (old_val < thresh)
+               return false;
+       if (new_val >= thresh)
+               return false;
+       return true;
+}
+
 int btrfs_update_block_group(struct btrfs_trans_handle *trans,
                             u64 bytenr, u64 num_bytes, bool alloc)
 {
@@ -3220,6 +3245,8 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
        spin_unlock(&info->delalloc_root_lock);
 
        while (total) {
+               bool reclaim;
+
                cache = btrfs_lookup_block_group(info, bytenr);
                if (!cache) {
                        ret = -ENOENT;
@@ -3265,6 +3292,8 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
                                        cache->space_info, num_bytes);
                        cache->space_info->bytes_used -= num_bytes;
                        cache->space_info->disk_used -= num_bytes * factor;
+
+                       reclaim = should_reclaim_block_group(cache, num_bytes);
                        spin_unlock(&cache->lock);
                        spin_unlock(&cache->space_info->lock);
 
@@ -3291,6 +3320,8 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
                if (!alloc && old_val == 0) {
                        if (!btrfs_test_opt(info, DISCARD_ASYNC))
                                btrfs_mark_bg_unused(cache);
+               } else if (!alloc && reclaim) {
+                       btrfs_mark_bg_to_reclaim(cache);
                }
 
                btrfs_put_block_group(cache);