int ref_mod);
static int update_reserved_extents(struct btrfs_root *root,
u64 bytenr, u64 num, int reserve);
-static int pin_down_bytes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, int is_data);
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc,
} else {
goto out;
}
+ btrfs_unlock_up_safe(path, 1);
btrfs_mark_buffer_dirty(path->nodes[0]);
out:
btrfs_release_path(root, path);
return -ENOMEM;
path->reada = 1;
+ path->leave_spinning = 1;
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
/* first find the extent item and update its reference count */
ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
path, 0, 1);
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_set_path_blocking(path);
return ret;
+ }
if (ret > 0) {
WARN_ON(1);
refs = btrfs_extent_refs(l, item);
btrfs_set_extent_refs(l, item, refs + refs_to_add);
+ btrfs_unlock_up_safe(path, 1);
+
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_release_path(root->fs_info->extent_root, path);
path->reada = 1;
+ path->leave_spinning = 1;
+
/* now insert the actual backref */
ret = insert_extent_backref(trans, root->fs_info->extent_root,
path, bytenr, parent,
return NULL;
}
-/*
- * this starts processing the delayed reference count updates and
- * extent insertions we have queued up so far. count can be
- * 0, which means to process everything in the tree at the start
- * of the run (but not newly added entries), or it can be some target
- * number you'd like to process.
- */
-int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, unsigned long count)
+static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct list_head *cluster)
{
- struct rb_node *node;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_ref_head *locked_ref = NULL;
int ret;
+ int count = 0;
int must_insert_reserved = 0;
- int run_all = count == (unsigned long)-1;
-
- if (root == root->fs_info->extent_root)
- root = root->fs_info->tree_root;
delayed_refs = &trans->transaction->delayed_refs;
-again:
- spin_lock(&delayed_refs->lock);
- if (count == 0)
- count = delayed_refs->num_entries;
while (1) {
if (!locked_ref) {
- /*
- * no locked ref, go find something we can
- * process in the rbtree. We start at
- * the beginning of the tree, there may be less
- * lock contention if we do something smarter here.
- */
- node = rb_first(&delayed_refs->root);
- if (!node) {
- spin_unlock(&delayed_refs->lock);
+ /* pick a new head ref from the cluster list */
+ if (list_empty(cluster))
break;
- }
- ref = rb_entry(node, struct btrfs_delayed_ref_node,
- rb_node);
- ret = btrfs_lock_delayed_ref(trans, ref, &locked_ref);
- if (ret) {
- spin_unlock(&delayed_refs->lock);
- break;
+ locked_ref = list_entry(cluster->next,
+ struct btrfs_delayed_ref_head, cluster);
+
+ /* grab the lock that says we are going to process
+ * all the refs for this head */
+ ret = btrfs_delayed_ref_lock(trans, locked_ref);
+
+ /*
+ * we may have dropped the spin lock to get the head
+ * mutex lock, and that might have given someone else
+ * time to free the head. If that's true, it has been
+ * removed from our list and we can move on.
+ */
+ if (ret == -EAGAIN) {
+ locked_ref = NULL;
+ count++;
+ continue;
}
}
* locked_ref is the head node, so we have to go one
* node back for any delayed ref updates
*/
-
ref = select_delayed_ref(locked_ref);
if (!ref) {
/* All delayed refs have been processed, Go ahead
* so that any accounting fixes can happen
*/
ref = &locked_ref->node;
+ list_del_init(&locked_ref->cluster);
locked_ref = NULL;
}
BUG_ON(ret);
btrfs_put_delayed_ref(ref);
- /* once we lock the head ref, we have to process all the
- * entries for it. So, we might end up doing more entries
- * that count was asking us to do.
- */
- if (count > 0)
- count--;
+ count++;
+ cond_resched();
+ spin_lock(&delayed_refs->lock);
+ }
+ return count;
+}
+
+/*
+ * this starts processing the delayed reference count updates and
+ * extent insertions we have queued up so far. count can be
+ * 0, which means to process everything in the tree at the start
+ * of the run (but not newly added entries), or it can be some target
+ * number you'd like to process.
+ */
+int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, unsigned long count)
+{
+ struct rb_node *node;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_delayed_ref_node *ref;
+ struct list_head cluster;
+ int ret;
+ int run_all = count == (unsigned long)-1;
+ int run_most = 0;
+
+ if (root == root->fs_info->extent_root)
+ root = root->fs_info->tree_root;
+
+ delayed_refs = &trans->transaction->delayed_refs;
+ INIT_LIST_HEAD(&cluster);
+again:
+ spin_lock(&delayed_refs->lock);
+ if (count == 0) {
+ count = delayed_refs->num_entries * 2;
+ run_most = 1;
+ }
+ while (1) {
+ if (!(run_all || run_most) &&
+ delayed_refs->num_heads_ready < 64)
+ break;
/*
- * we set locked_ref to null above if we're all done
- * with this bytenr
+ * go find something we can process in the rbtree. We start at
+ * the beginning of the tree, and then build a cluster
+ * of refs to process starting at the first one we are able to
+ * lock
*/
- if (!locked_ref && count == 0)
+ ret = btrfs_find_ref_cluster(trans, &cluster,
+ delayed_refs->run_delayed_start);
+ if (ret)
break;
- spin_lock(&delayed_refs->lock);
+ ret = run_clustered_refs(trans, root, &cluster);
+ BUG_ON(ret < 0);
+
+ count -= min_t(unsigned long, ret, count);
+
+ if (count == 0)
+ break;
}
+
if (run_all) {
- spin_lock(&delayed_refs->lock);
node = rb_first(&delayed_refs->root);
- if (!node) {
- spin_unlock(&delayed_refs->lock);
+ if (!node)
goto out;
- }
+ count = (unsigned long)-1;
while (node) {
ref = rb_entry(node, struct btrfs_delayed_ref_node,
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(ref);
+ cond_resched();
goto again;
}
node = rb_next(node);
}
spin_unlock(&delayed_refs->lock);
- count = (unsigned long)-1;
schedule_timeout(1);
goto again;
}
out:
+ spin_unlock(&delayed_refs->lock);
return 0;
}
clear_extent_dirty(&fs_info->pinned_extents,
bytenr, bytenr + num - 1, GFP_NOFS);
}
+ mutex_unlock(&root->fs_info->pinned_mutex);
+
while (num > 0) {
cache = btrfs_lookup_block_group(fs_info, bytenr);
BUG_ON(!cache);
u64 end;
int ret;
- mutex_lock(&root->fs_info->pinned_mutex);
while (1) {
+ mutex_lock(&root->fs_info->pinned_mutex);
ret = find_first_extent_bit(unpin, 0, &start, &end,
EXTENT_DIRTY);
if (ret)
ret = btrfs_discard_extent(root, start, end + 1 - start);
+ /* unlocks the pinned mutex */
btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
clear_extent_dirty(unpin, start, end, GFP_NOFS);
- if (need_resched()) {
- mutex_unlock(&root->fs_info->pinned_mutex);
- cond_resched();
- mutex_lock(&root->fs_info->pinned_mutex);
- }
+ cond_resched();
}
mutex_unlock(&root->fs_info->pinned_mutex);
return ret;
static int pin_down_bytes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, int is_data)
+ struct btrfs_path *path,
+ u64 bytenr, u64 num_bytes, int is_data,
+ struct extent_buffer **must_clean)
{
int err = 0;
struct extent_buffer *buf;
header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID &&
header_transid == trans->transid &&
!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
- clean_tree_block(NULL, root, buf);
- btrfs_tree_unlock(buf);
- free_extent_buffer(buf);
+ *must_clean = buf;
return 1;
}
btrfs_tree_unlock(buf);
}
free_extent_buffer(buf);
pinit:
+ btrfs_set_path_blocking(path);
+ mutex_lock(&root->fs_info->pinned_mutex);
+ /* unlocks the pinned mutex */
btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
BUG_ON(err < 0);
return -ENOMEM;
path->reada = 1;
+ path->leave_spinning = 1;
ret = lookup_extent_backref(trans, extent_root, path,
bytenr, parent, root_objectid,
ref_generation, owner_objectid, 1);
refs_to_drop);
BUG_ON(ret);
btrfs_release_path(extent_root, path);
+ path->leave_spinning = 1;
ret = btrfs_search_slot(trans, extent_root,
&key, path, -1, 1);
if (ret) {
/* if refs are 0, we need to setup the path for deletion */
if (refs == 0) {
btrfs_release_path(extent_root, path);
+ path->leave_spinning = 1;
ret = btrfs_search_slot(trans, extent_root, &key, path,
-1, 1);
BUG_ON(ret);
if (refs == 0) {
u64 super_used;
u64 root_used;
+ struct extent_buffer *must_clean = NULL;
if (pin) {
- mutex_lock(&root->fs_info->pinned_mutex);
- ret = pin_down_bytes(trans, root, bytenr, num_bytes,
- owner_objectid >= BTRFS_FIRST_FREE_OBJECTID);
- mutex_unlock(&root->fs_info->pinned_mutex);
+ ret = pin_down_bytes(trans, root, path,
+ bytenr, num_bytes,
+ owner_objectid >= BTRFS_FIRST_FREE_OBJECTID,
+ &must_clean);
if (ret > 0)
mark_free = 1;
BUG_ON(ret < 0);
}
+
/* block accounting for super block */
spin_lock(&info->delalloc_lock);
super_used = btrfs_super_bytes_used(&info->super_copy);
btrfs_set_root_used(&root->root_item,
root_used - num_bytes);
spin_unlock(&info->delalloc_lock);
+
+ /*
+ * it is going to be very rare for someone to be waiting
+ * on the block we're freeing. del_items might need to
+ * schedule, so rather than get fancy, just force it
+ * to blocking here
+ */
+ if (must_clean)
+ btrfs_set_lock_blocking(must_clean);
+
ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
num_to_del);
BUG_ON(ret);
btrfs_release_path(extent_root, path);
+ if (must_clean) {
+ clean_tree_block(NULL, root, must_clean);
+ btrfs_tree_unlock(must_clean);
+ free_extent_buffer(must_clean);
+ }
+
if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
BUG_ON(ret);
+ } else {
+ invalidate_mapping_pages(info->btree_inode->i_mapping,
+ bytenr >> PAGE_CACHE_SHIFT,
+ (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT);
}
ret = update_block_group(trans, root, bytenr, num_bytes, 0,
owner_objectid, pin, pin == 0, refs_to_drop);
}
+/*
+ * when we free an extent, it is possible (and likely) that we free the last
+ * delayed ref for that extent as well. This searches the delayed ref tree for
+ * a given extent, and if there are no other delayed refs to be processed, it
+ * removes it from the tree.
+ */
+static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr)
+{
+ struct btrfs_delayed_ref_head *head;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_delayed_ref_node *ref;
+ struct rb_node *node;
+ int ret;
+
+ delayed_refs = &trans->transaction->delayed_refs;
+ spin_lock(&delayed_refs->lock);
+ head = btrfs_find_delayed_ref_head(trans, bytenr);
+ if (!head)
+ goto out;
+
+ node = rb_prev(&head->node.rb_node);
+ if (!node)
+ goto out;
+
+ ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+
+ /* there are still entries for this ref, we can't drop it */
+ if (ref->bytenr == bytenr)
+ goto out;
+
+ /*
+ * waiting for the lock here would deadlock. If someone else has it
+ * locked they are already in the process of dropping it anyway
+ */
+ if (!mutex_trylock(&head->mutex))
+ goto out;
+
+ /*
+ * at this point we have a head with no other entries. Go
+ * ahead and process it.
+ */
+ head->node.in_tree = 0;
+ rb_erase(&head->node.rb_node, &delayed_refs->root);
+
+ delayed_refs->num_entries--;
+
+ /*
+ * we don't take a ref on the node because we're removing it from the
+ * tree, so we just steal the ref the tree was holding.
+ */
+ delayed_refs->num_heads--;
+ if (list_empty(&head->cluster))
+ delayed_refs->num_heads_ready--;
+
+ list_del_init(&head->cluster);
+ spin_unlock(&delayed_refs->lock);
+
+ ret = run_one_delayed_ref(trans, root->fs_info->tree_root,
+ &head->node, head->must_insert_reserved);
+ BUG_ON(ret);
+ btrfs_put_delayed_ref(&head->node);
+ return 0;
+out:
+ spin_unlock(&delayed_refs->lock);
+ return 0;
+}
+
int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID &&
owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
mutex_lock(&root->fs_info->pinned_mutex);
+
+ /* unlocks the pinned mutex */
btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
- mutex_unlock(&root->fs_info->pinned_mutex);
update_reserved_extents(root, bytenr, num_bytes, 0);
ret = 0;
} else {
root_objectid, ref_generation,
owner_objectid,
BTRFS_DROP_DELAYED_REF, 1);
+ BUG_ON(ret);
+ ret = check_ref_cleanup(trans, root, bytenr);
+ BUG_ON(ret);
}
return ret;
}
path = btrfs_alloc_path();
BUG_ON(!path);
+ path->leave_spinning = 1;
ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
sizes, 2);
BUG_ON(ret);
struct btrfs_path *path;
int i;
int orig_level;
+ int update_count;
struct btrfs_root_item *root_item = &root->root_item;
WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
}
}
while (1) {
+ unsigned long update;
wret = walk_down_tree(trans, root, path, &level);
if (wret > 0)
break;
break;
if (wret < 0)
ret = wret;
- if (trans->transaction->in_commit) {
+ if (trans->transaction->in_commit ||
+ trans->transaction->delayed_refs.flushing) {
ret = -EAGAIN;
break;
}
atomic_inc(&root->fs_info->throttle_gen);
wake_up(&root->fs_info->transaction_throttle);
+ for (update_count = 0; update_count < 16; update_count++) {
+ update = trans->delayed_ref_updates;
+ trans->delayed_ref_updates = 0;
+ if (update)
+ btrfs_run_delayed_refs(trans, root, update);
+ else
+ break;
+ }
}
for (i = 0; i <= orig_level; i++) {
if (path->nodes[i]) {
if (!path)
return -ENOMEM;
+ path->leave_spinning = 1;
ret = btrfs_insert_empty_inode(trans, root, path, objectid);
if (ret)
goto out;
extent_root = root->fs_info->extent_root;
- root->fs_info->last_trans_new_blockgroup = trans->transid;
+ root->fs_info->last_trans_log_full_commit = trans->transid;
cache = kzalloc(sizeof(*cache), GFP_NOFS);
if (!cache)