btrfs: don't use global reserve for chunk allocation
authorJosef Bacik <josef@toxicpanda.com>
Wed, 21 Nov 2018 19:03:08 +0000 (14:03 -0500)
committerDavid Sterba <dsterba@suse.com>
Mon, 25 Feb 2019 13:13:34 +0000 (14:13 +0100)
We've done this forever because of the voodoo around knowing how much
space we have.  However, we have better ways of doing this now, and on
normal file systems we'll easily have a global reserve of 512MiB, and
since metadata chunks are usually 1GiB that means we'll allocate
metadata chunks more readily.  Instead use the actual used amount when
determining if we need to allocate a chunk or not.

This has a side effect for mixed block group fs'es where we are no
longer allocating enough chunks for the data/metadata requirements.  To
deal with this add a ALLOC_CHUNK_FORCE step to the flushing state
machine.  This will only get used if we've already made a full loop
through the flushing machinery and tried committing the transaction.

If we have then we can try and force a chunk allocation since we likely
need it to make progress.  This resolves issues I was seeing with
the mixed bg tests in xfstests without the new flushing state.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
[ merged with patch "add ALLOC_CHUNK_FORCE to the flushing code" ]
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
include/trace/events/btrfs.h

index 6e0fd98c6bd9e72508071d22a40f417412f64f74..9306925b67904e18808a3bdde31225aad453a36f 100644 (file)
@@ -2805,7 +2805,8 @@ enum btrfs_flush_state {
        FLUSH_DELALLOC          =       5,
        FLUSH_DELALLOC_WAIT     =       6,
        ALLOC_CHUNK             =       7,
        FLUSH_DELALLOC          =       5,
        FLUSH_DELALLOC_WAIT     =       6,
        ALLOC_CHUNK             =       7,
-       COMMIT_TRANS            =       8,
+       ALLOC_CHUNK_FORCE       =       8,
+       COMMIT_TRANS            =       9,
 };
 
 int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
 };
 
 int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
index 9e74ce2d4cf23cf991058252e1ca3d9e0cbb2871..d637f4c4bcd076138414e9ec779785d808eb8053 100644 (file)
@@ -4399,21 +4399,12 @@ static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
 static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
                              struct btrfs_space_info *sinfo, int force)
 {
 static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
                              struct btrfs_space_info *sinfo, int force)
 {
-       struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
        u64 bytes_used = btrfs_space_info_used(sinfo, false);
        u64 thresh;
 
        if (force == CHUNK_ALLOC_FORCE)
                return 1;
 
        u64 bytes_used = btrfs_space_info_used(sinfo, false);
        u64 thresh;
 
        if (force == CHUNK_ALLOC_FORCE)
                return 1;
 
-       /*
-        * We need to take into account the global rsv because for all intents
-        * and purposes it's used space.  Don't worry about locking the
-        * global_rsv, it doesn't change except when the transaction commits.
-        */
-       if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
-               bytes_used += calc_global_rsv_need_space(global_rsv);
-
        /*
         * in limited mode, we want to have some free space up to
         * about 1% of the FS size.
        /*
         * in limited mode, we want to have some free space up to
         * about 1% of the FS size.
@@ -4960,6 +4951,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
                btrfs_end_transaction(trans);
                break;
        case ALLOC_CHUNK:
                btrfs_end_transaction(trans);
                break;
        case ALLOC_CHUNK:
+       case ALLOC_CHUNK_FORCE:
                trans = btrfs_join_transaction(root);
                if (IS_ERR(trans)) {
                        ret = PTR_ERR(trans);
                trans = btrfs_join_transaction(root);
                if (IS_ERR(trans)) {
                        ret = PTR_ERR(trans);
@@ -4967,7 +4959,8 @@ static void flush_space(struct btrfs_fs_info *fs_info,
                }
                ret = do_chunk_alloc(trans,
                                     btrfs_metadata_alloc_profile(fs_info),
                }
                ret = do_chunk_alloc(trans,
                                     btrfs_metadata_alloc_profile(fs_info),
-                                    CHUNK_ALLOC_NO_FORCE);
+                                    (state == ALLOC_CHUNK) ?
+                                     CHUNK_ALLOC_NO_FORCE : CHUNK_ALLOC_FORCE);
                btrfs_end_transaction(trans);
                if (ret > 0 || ret == -ENOSPC)
                        ret = 0;
                btrfs_end_transaction(trans);
                if (ret > 0 || ret == -ENOSPC)
                        ret = 0;
@@ -5111,6 +5104,19 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
                                commit_cycles--;
                }
 
                                commit_cycles--;
                }
 
+               /*
+                * We don't want to force a chunk allocation until we've tried
+                * pretty hard to reclaim space.  Think of the case where we
+                * freed up a bunch of space and so have a lot of pinned space
+                * to reclaim.  We would rather use that than possibly create a
+                * underutilized metadata chunk.  So if this is our first run
+                * through the flushing state machine skip ALLOC_CHUNK_FORCE and
+                * commit the transaction.  If nothing has changed the next go
+                * around then we can force a chunk allocation.
+                */
+               if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
+                       flush_state++;
+
                if (flush_state > COMMIT_TRANS) {
                        commit_cycles++;
                        if (commit_cycles > 2) {
                if (flush_state > COMMIT_TRANS) {
                        commit_cycles++;
                        if (commit_cycles > 2) {
index 2887503e4d128fd3ab9caaea8f55cfc16bf07006..3f08b652363b2232290638d6a1a8b975a4409c4e 100644 (file)
@@ -1051,6 +1051,7 @@ TRACE_EVENT(btrfs_trigger_flush,
                { FLUSH_DELAYED_REFS_NR,        "FLUSH_DELAYED_REFS_NR"},       \
                { FLUSH_DELAYED_REFS,           "FLUSH_ELAYED_REFS"},           \
                { ALLOC_CHUNK,                  "ALLOC_CHUNK"},                 \
                { FLUSH_DELAYED_REFS_NR,        "FLUSH_DELAYED_REFS_NR"},       \
                { FLUSH_DELAYED_REFS,           "FLUSH_ELAYED_REFS"},           \
                { ALLOC_CHUNK,                  "ALLOC_CHUNK"},                 \
+               { ALLOC_CHUNK_FORCE,            "ALLOC_CHUNK_FORCE"},           \
                { COMMIT_TRANS,                 "COMMIT_TRANS"})
 
 TRACE_EVENT(btrfs_flush_space,
                { COMMIT_TRANS,                 "COMMIT_TRANS"})
 
 TRACE_EVENT(btrfs_flush_space,