}
if (run_all) {
- if (!list_empty(&trans->new_bgs))
- btrfs_create_pending_block_groups(trans);
+ btrfs_create_pending_block_groups(trans);
spin_lock(&delayed_refs->lock);
node = rb_first_cached(&delayed_refs->href_root);
/*
* The cleaner kthread might still be doing iput
* operations. Wait for it to finish so that
- * more space is released.
+ * more space is released. We don't need to
+ * explicitly run the delayed iputs here because
+ * the commit_transaction would have woken up
+ * the cleaner.
*/
- mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
- mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
+ ret = btrfs_wait_on_delayed_iputs(fs_info);
+ if (ret)
+ return ret;
goto again;
} else {
btrfs_end_transaction(trans);
static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *sinfo, int force)
{
- struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
u64 bytes_used = btrfs_space_info_used(sinfo, false);
u64 thresh;
if (force == CHUNK_ALLOC_FORCE)
return 1;
- /*
- * We need to take into account the global rsv because for all intents
- * and purposes it's used space. Don't worry about locking the
- * global_rsv, it doesn't change except when the transaction commits.
- */
- if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
- bytes_used += calc_global_rsv_need_space(global_rsv);
-
/*
* in limited mode, we want to have some free space up to
* about 1% of the FS size.
struct btrfs_space_info *space_info;
struct btrfs_trans_handle *trans;
u64 delalloc_bytes;
- u64 max_reclaim;
+ u64 async_pages;
u64 items;
long time_left;
unsigned long nr_pages;
loops = 0;
while (delalloc_bytes && loops < 3) {
- max_reclaim = min(delalloc_bytes, to_reclaim);
- nr_pages = max_reclaim >> PAGE_SHIFT;
+ nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
+
+ /*
+ * Triggers inode writeback for up to nr_pages. This will invoke
+ * ->writepages callback and trigger delalloc filling
+ * (btrfs_run_delalloc_range()).
+ */
btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items);
+
/*
- * We need to wait for the async pages to actually start before
- * we do anything.
+ * We need to wait for the compressed pages to start before
+ * we continue.
*/
- max_reclaim = atomic_read(&fs_info->async_delalloc_pages);
- if (!max_reclaim)
+ async_pages = atomic_read(&fs_info->async_delalloc_pages);
+ if (!async_pages)
goto skip_async;
- if (max_reclaim <= nr_pages)
- max_reclaim = 0;
+ /*
+ * Calculate how many compressed pages we want to be written
+ * before we continue. I.e if there are more async pages than we
+ * require wait_event will wait until nr_pages are written.
+ */
+ if (async_pages <= nr_pages)
+ async_pages = 0;
else
- max_reclaim -= nr_pages;
+ async_pages -= nr_pages;
wait_event(fs_info->async_submit_wait,
atomic_read(&fs_info->async_delalloc_pages) <=
- (int)max_reclaim);
+ (int)async_pages);
skip_async:
spin_lock(&space_info->lock);
if (list_empty(&space_info->tickets) &&
if (!bytes_needed)
return 0;
- /* See if there is enough pinned space to make this reservation */
- if (__percpu_counter_compare(&space_info->total_bytes_pinned,
- bytes_needed,
- BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
+ trans = btrfs_join_transaction(fs_info->extent_root);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ /*
+ * See if there is enough pinned space to make this reservation, or if
+ * we have block groups that are going to be freed, allowing us to
+ * possibly do a chunk allocation the next loop through.
+ */
+ if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
+ __percpu_counter_compare(&space_info->total_bytes_pinned,
+ bytes_needed,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
goto commit;
/*
* this reservation.
*/
if (space_info != delayed_rsv->space_info)
- return -ENOSPC;
+ goto enospc;
spin_lock(&delayed_rsv->lock);
reclaim_bytes += delayed_rsv->reserved;
if (__percpu_counter_compare(&space_info->total_bytes_pinned,
bytes_needed,
- BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0) {
- return -ENOSPC;
- }
+ BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0)
+ goto enospc;
commit:
- trans = btrfs_join_transaction(fs_info->extent_root);
- if (IS_ERR(trans))
- return -ENOSPC;
-
return btrfs_commit_transaction(trans);
+enospc:
+ btrfs_end_transaction(trans);
+ return -ENOSPC;
}
/*
btrfs_end_transaction(trans);
break;
case ALLOC_CHUNK:
+ case ALLOC_CHUNK_FORCE:
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
}
ret = do_chunk_alloc(trans,
btrfs_metadata_alloc_profile(fs_info),
- CHUNK_ALLOC_NO_FORCE);
+ (state == ALLOC_CHUNK) ?
+ CHUNK_ALLOC_NO_FORCE : CHUNK_ALLOC_FORCE);
btrfs_end_transaction(trans);
if (ret > 0 || ret == -ENOSPC)
ret = 0;
* bunch of pinned space, so make sure we run the iputs before
* we do our pinned bytes check below.
*/
- mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
btrfs_run_delayed_iputs(fs_info);
- mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
+ btrfs_wait_on_delayed_iputs(fs_info);
ret = may_commit_transaction(fs_info, space_info);
break;
commit_cycles--;
}
+ /*
+ * We don't want to force a chunk allocation until we've tried
+ * pretty hard to reclaim space. Think of the case where we
+ * freed up a bunch of space and so have a lot of pinned space
+ * to reclaim. We would rather use that than possibly create a
+ * underutilized metadata chunk. So if this is our first run
+ * through the flushing state machine skip ALLOC_CHUNK_FORCE and
+ * commit the transaction. If nothing has changed the next go
+ * around then we can force a chunk allocation.
+ */
+ if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
+ flush_state++;
+
if (flush_state > COMMIT_TRANS) {
commit_cycles++;
if (commit_cycles > 2) {
return ret;
}
+#define DUMP_BLOCK_RSV(fs_info, rsv_name) \
+do { \
+ struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \
+ spin_lock(&__rsv->lock); \
+ btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \
+ __rsv->size, __rsv->reserved); \
+ spin_unlock(&__rsv->lock); \
+} while (0)
+
static void dump_space_info(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *info, u64 bytes,
int dump_block_groups)
info->bytes_readonly);
spin_unlock(&info->lock);
+ DUMP_BLOCK_RSV(fs_info, global_block_rsv);
+ DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
+ DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
+ DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
+ DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
+
if (!dump_block_groups)
return;
clean_tree_block(fs_info, buf);
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
- btrfs_set_lock_blocking(buf);
+ btrfs_set_lock_blocking_write(buf);
set_extent_buffer_uptodate(buf);
memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
reada = 1;
}
btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
+ btrfs_set_lock_blocking_write(next);
ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
&wc->refs[level - 1],
return -EIO;
}
btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
+ btrfs_set_lock_blocking_write(next);
}
level--;
if (!path->locks[level]) {
BUG_ON(level == 0);
btrfs_tree_lock(eb);
- btrfs_set_lock_blocking(eb);
+ btrfs_set_lock_blocking_write(eb);
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
ret = btrfs_lookup_extent_info(trans, fs_info,
if (!path->locks[level] &&
btrfs_header_generation(eb) == trans->transid) {
btrfs_tree_lock(eb);
- btrfs_set_lock_blocking(eb);
+ btrfs_set_lock_blocking_write(eb);
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
}
clean_tree_block(fs_info, eb);
if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
level = btrfs_header_level(root->node);
path->nodes[level] = btrfs_lock_root_node(root);
- btrfs_set_lock_blocking(path->nodes[level]);
+ btrfs_set_lock_blocking_write(path->nodes[level]);
path->slots[level] = 0;
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
memset(&wc->update_progress, 0,
level = btrfs_header_level(root->node);
while (1) {
btrfs_tree_lock(path->nodes[level]);
- btrfs_set_lock_blocking(path->nodes[level]);
+ btrfs_set_lock_blocking_write(path->nodes[level]);
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
ret = btrfs_lookup_extent_info(trans, fs_info,
{
struct btrfs_space_info *sinfo = cache->space_info;
u64 num_bytes;
+ u64 sinfo_used;
u64 min_allocable_bytes;
int ret = -ENOSPC;
num_bytes = cache->key.offset - cache->reserved - cache->pinned -
cache->bytes_super - btrfs_block_group_used(&cache->item);
+ sinfo_used = btrfs_space_info_used(sinfo, true);
- if (btrfs_space_info_used(sinfo, true) + num_bytes +
- min_allocable_bytes <= sinfo->total_bytes) {
+ if (sinfo_used + num_bytes + min_allocable_bytes <=
+ sinfo->total_bytes) {
sinfo->bytes_readonly += num_bytes;
cache->ro++;
list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
out:
spin_unlock(&cache->lock);
spin_unlock(&sinfo->lock);
+ if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) {
+ btrfs_info(cache->fs_info,
+ "unable to make block group %llu ro",
+ cache->key.objectid);
+ btrfs_info(cache->fs_info,
+ "sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu",
+ sinfo_used, num_bytes, min_allocable_bytes);
+ dump_space_info(cache->fs_info, cache->space_info, 0, 0);
+ }
return ret;
}