btrfs: don't use global reserve for chunk allocation
[sfrench/cifs-2.6.git] / fs / btrfs / extent-tree.c
index f72935646fb154903abb0fd5f6863ad98194e117..d637f4c4bcd076138414e9ec779785d808eb8053 100644 (file)
@@ -4399,21 +4399,12 @@ static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
 static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
                              struct btrfs_space_info *sinfo, int force)
 {
-       struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
        u64 bytes_used = btrfs_space_info_used(sinfo, false);
        u64 thresh;
 
        if (force == CHUNK_ALLOC_FORCE)
                return 1;
 
-       /*
-        * We need to take into account the global rsv because for all intents
-        * and purposes it's used space.  Don't worry about locking the
-        * global_rsv, it doesn't change except when the transaction commits.
-        */
-       if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
-               bytes_used += calc_global_rsv_need_space(global_rsv);
-
        /*
         * in limited mode, we want to have some free space up to
         * about 1% of the FS size.
@@ -4865,10 +4856,19 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
        if (!bytes_needed)
                return 0;
 
-       /* See if there is enough pinned space to make this reservation */
-       if (__percpu_counter_compare(&space_info->total_bytes_pinned,
-                                  bytes_needed,
-                                  BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
+       trans = btrfs_join_transaction(fs_info->extent_root);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+
+       /*
+        * See if there is enough pinned space to make this reservation, or if
+        * we have block groups that are going to be freed, allowing us to
+        * possibly do a chunk allocation the next loop through.
+        */
+       if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
+           __percpu_counter_compare(&space_info->total_bytes_pinned,
+                                    bytes_needed,
+                                    BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
                goto commit;
 
        /*
@@ -4876,7 +4876,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
         * this reservation.
         */
        if (space_info != delayed_rsv->space_info)
-               return -ENOSPC;
+               goto enospc;
 
        spin_lock(&delayed_rsv->lock);
        reclaim_bytes += delayed_rsv->reserved;
@@ -4891,16 +4891,14 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
 
        if (__percpu_counter_compare(&space_info->total_bytes_pinned,
                                   bytes_needed,
-                                  BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0) {
-               return -ENOSPC;
-       }
+                                  BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0)
+               goto enospc;
 
 commit:
-       trans = btrfs_join_transaction(fs_info->extent_root);
-       if (IS_ERR(trans))
-               return -ENOSPC;
-
        return btrfs_commit_transaction(trans);
+enospc:
+       btrfs_end_transaction(trans);
+       return -ENOSPC;
 }
 
 /*
@@ -4953,6 +4951,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
                btrfs_end_transaction(trans);
                break;
        case ALLOC_CHUNK:
+       case ALLOC_CHUNK_FORCE:
                trans = btrfs_join_transaction(root);
                if (IS_ERR(trans)) {
                        ret = PTR_ERR(trans);
@@ -4960,7 +4959,8 @@ static void flush_space(struct btrfs_fs_info *fs_info,
                }
                ret = do_chunk_alloc(trans,
                                     btrfs_metadata_alloc_profile(fs_info),
-                                    CHUNK_ALLOC_NO_FORCE);
+                                    (state == ALLOC_CHUNK) ?
+                                     CHUNK_ALLOC_NO_FORCE : CHUNK_ALLOC_FORCE);
                btrfs_end_transaction(trans);
                if (ret > 0 || ret == -ENOSPC)
                        ret = 0;
@@ -5104,6 +5104,19 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
                                commit_cycles--;
                }
 
+               /*
+                * We don't want to force a chunk allocation until we've tried
+                * pretty hard to reclaim space.  Think of the case where we
+                * freed up a bunch of space and so have a lot of pinned space
+                * to reclaim.  We would rather use that than possibly create a
+                * underutilized metadata chunk.  So if this is our first run
+                * through the flushing state machine skip ALLOC_CHUNK_FORCE and
+                * commit the transaction.  If nothing has changed the next go
+                * around then we can force a chunk allocation.
+                */
+               if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
+                       flush_state++;
+
                if (flush_state > COMMIT_TRANS) {
                        commit_cycles++;
                        if (commit_cycles > 2) {
@@ -8079,6 +8092,15 @@ loop:
        return ret;
 }
 
+#define DUMP_BLOCK_RSV(fs_info, rsv_name)                              \
+do {                                                                   \
+       struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name;           \
+       spin_lock(&__rsv->lock);                                        \
+       btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu",      \
+                  __rsv->size, __rsv->reserved);                       \
+       spin_unlock(&__rsv->lock);                                      \
+} while (0)
+
 static void dump_space_info(struct btrfs_fs_info *fs_info,
                            struct btrfs_space_info *info, u64 bytes,
                            int dump_block_groups)
@@ -8098,6 +8120,12 @@ static void dump_space_info(struct btrfs_fs_info *fs_info,
                info->bytes_readonly);
        spin_unlock(&info->lock);
 
+       DUMP_BLOCK_RSV(fs_info, global_block_rsv);
+       DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
+       DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
+       DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
+       DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
+
        if (!dump_block_groups)
                return;