Btrfs: remove assertion when searching for a key in a node/leaf
[sfrench/cifs-2.6.git] / fs / btrfs / ctree.c
index d92462fe66c829ff7abe4c73f583812fdaca7839..324df36d28bf7d8da2b7f66186819a6e0a92968f 100644 (file)
@@ -13,6 +13,7 @@
 #include "print-tree.h"
 #include "locking.h"
 #include "volumes.h"
+#include "qgroup.h"
 
 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
                      *root, struct btrfs_path *path, int level);
@@ -45,11 +46,18 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
        for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
                if (!p->nodes[i] || !p->locks[i])
                        continue;
-               btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]);
-               if (p->locks[i] == BTRFS_READ_LOCK)
+               /*
+                * If we currently have a spinning reader or writer lock this
+                * will bump the count of blocking holders and drop the
+                * spinlock.
+                */
+               if (p->locks[i] == BTRFS_READ_LOCK) {
+                       btrfs_set_lock_blocking_read(p->nodes[i]);
                        p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
-               else if (p->locks[i] == BTRFS_WRITE_LOCK)
+               } else if (p->locks[i] == BTRFS_WRITE_LOCK) {
+                       btrfs_set_lock_blocking_write(p->nodes[i]);
                        p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
+               }
        }
 }
 
@@ -968,6 +976,48 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+static struct extent_buffer *alloc_tree_block_no_bg_flush(
+                                         struct btrfs_trans_handle *trans,
+                                         struct btrfs_root *root,
+                                         u64 parent_start,
+                                         const struct btrfs_disk_key *disk_key,
+                                         int level,
+                                         u64 hint,
+                                         u64 empty_size)
+{
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct extent_buffer *ret;
+
+       /*
+        * If we are COWing a node/leaf from the extent, chunk, device or free
+        * space trees, make sure that we do not finish block group creation of
+        * pending block groups. We do this to avoid a deadlock.
+        * COWing can result in allocation of a new chunk, and flushing pending
+        * block groups (btrfs_create_pending_block_groups()) can be triggered
+        * when finishing allocation of a new chunk. Creation of a pending block
+        * group modifies the extent, chunk, device and free space trees,
+        * therefore we could deadlock with ourselves since we are holding a
+        * lock on an extent buffer that btrfs_create_pending_block_groups() may
+        * try to COW later.
+        * For similar reasons, we also need to delay flushing pending block
+        * groups when splitting a leaf or node, from one of those trees, since
+        * we are holding a write lock on it and its parent or when inserting a
+        * new root node for one of those trees.
+        */
+       if (root == fs_info->extent_root ||
+           root == fs_info->chunk_root ||
+           root == fs_info->dev_root ||
+           root == fs_info->free_space_root)
+               trans->can_flush_pending_bgs = false;
+
+       ret = btrfs_alloc_tree_block(trans, root, parent_start,
+                                    root->root_key.objectid, disk_key, level,
+                                    hint, empty_size);
+       trans->can_flush_pending_bgs = true;
+
+       return ret;
+}
+
 /*
  * does the dirty work in cow of a single block.  The parent block (if
  * supplied) is updated to point to the new cow copy.  The new buffer is marked
@@ -1015,26 +1065,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
        if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
                parent_start = parent->start;
 
-       /*
-        * If we are COWing a node/leaf from the extent, chunk or device trees,
-        * make sure that we do not finish block group creation of pending block
-        * groups. We do this to avoid a deadlock.
-        * COWing can result in allocation of a new chunk, and flushing pending
-        * block groups (btrfs_create_pending_block_groups()) can be triggered
-        * when finishing allocation of a new chunk. Creation of a pending block
-        * group modifies the extent, chunk and device trees, therefore we could
-        * deadlock with ourselves since we are holding a lock on an extent
-        * buffer that btrfs_create_pending_block_groups() may try to COW later.
-        */
-       if (root == fs_info->extent_root ||
-           root == fs_info->chunk_root ||
-           root == fs_info->dev_root)
-               trans->can_flush_pending_bgs = false;
-
-       cow = btrfs_alloc_tree_block(trans, root, parent_start,
-                       root->root_key.objectid, &disk_key, level,
-                       search_start, empty_size);
-       trans->can_flush_pending_bgs = true;
+       cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key,
+                                          level, search_start, empty_size);
        if (IS_ERR(cow))
                return PTR_ERR(cow);
 
@@ -1264,7 +1296,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
                return eb;
 
        btrfs_set_path_blocking(path);
-       btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+       btrfs_set_lock_blocking_read(eb);
 
        if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
                BUG_ON(tm->slot != 0);
@@ -1354,7 +1386,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
                free_extent_buffer(eb_root);
                eb = alloc_dummy_extent_buffer(fs_info, logical);
        } else {
-               btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
+               btrfs_set_lock_blocking_read(eb_root);
                eb = btrfs_clone_extent_buffer(eb_root);
                btrfs_tree_read_unlock_blocking(eb_root);
                free_extent_buffer(eb_root);
@@ -1462,9 +1494,16 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
        search_start = buf->start & ~((u64)SZ_1G - 1);
 
        if (parent)
-               btrfs_set_lock_blocking(parent);
-       btrfs_set_lock_blocking(buf);
+               btrfs_set_lock_blocking_write(parent);
+       btrfs_set_lock_blocking_write(buf);
 
+       /*
+        * Before CoWing this block for later modification, check if it's
+        * the subtree root and do the delayed subtree trace if needed.
+        *
+        * Also We don't care about the error, as it's handled internally.
+        */
+       btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
        ret = __btrfs_cow_block(trans, root, buf, parent,
                                 parent_slot, cow_ret, search_start, 0);
 
@@ -1558,7 +1597,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
        if (parent_nritems <= 1)
                return 0;
 
-       btrfs_set_lock_blocking(parent);
+       btrfs_set_lock_blocking_write(parent);
 
        for (i = start_slot; i <= end_slot; i++) {
                struct btrfs_key first_key;
@@ -1617,7 +1656,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
                        search_start = last_block;
 
                btrfs_tree_lock(cur);
-               btrfs_set_lock_blocking(cur);
+               btrfs_set_lock_blocking_write(cur);
                err = __btrfs_cow_block(trans, root, cur, parent, i,
                                        &cur, search_start,
                                        min(16 * blocksize,
@@ -1832,7 +1871,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                }
 
                btrfs_tree_lock(child);
-               btrfs_set_lock_blocking(child);
+               btrfs_set_lock_blocking_write(child);
                ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
                if (ret) {
                        btrfs_tree_unlock(child);
@@ -1870,7 +1909,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 
        if (left) {
                btrfs_tree_lock(left);
-               btrfs_set_lock_blocking(left);
+               btrfs_set_lock_blocking_write(left);
                wret = btrfs_cow_block(trans, root, left,
                                       parent, pslot - 1, &left);
                if (wret) {
@@ -1885,7 +1924,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 
        if (right) {
                btrfs_tree_lock(right);
-               btrfs_set_lock_blocking(right);
+               btrfs_set_lock_blocking_write(right);
                wret = btrfs_cow_block(trans, root, right,
                                       parent, pslot + 1, &right);
                if (wret) {
@@ -2048,7 +2087,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
                u32 left_nr;
 
                btrfs_tree_lock(left);
-               btrfs_set_lock_blocking(left);
+               btrfs_set_lock_blocking_write(left);
 
                left_nr = btrfs_header_nritems(left);
                if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
@@ -2103,7 +2142,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
                u32 right_nr;
 
                btrfs_tree_lock(right);
-               btrfs_set_lock_blocking(right);
+               btrfs_set_lock_blocking_write(right);
 
                right_nr = btrfs_header_nritems(right);
                if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
@@ -2505,26 +2544,6 @@ done:
        return ret;
 }
 
-static void key_search_validate(struct extent_buffer *b,
-                               const struct btrfs_key *key,
-                               int level)
-{
-#ifdef CONFIG_BTRFS_ASSERT
-       struct btrfs_disk_key disk_key;
-
-       btrfs_cpu_key_to_disk(&disk_key, key);
-
-       if (level == 0)
-               ASSERT(!memcmp_extent_buffer(b, &disk_key,
-                   offsetof(struct btrfs_leaf, items[0].key),
-                   sizeof(disk_key)));
-       else
-               ASSERT(!memcmp_extent_buffer(b, &disk_key,
-                   offsetof(struct btrfs_node, ptrs[0].key),
-                   sizeof(disk_key)));
-#endif
-}
-
 static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
                      int level, int *prev_cmp, int *slot)
 {
@@ -2533,7 +2552,6 @@ static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
                return *prev_cmp;
        }
 
-       key_search_validate(b, key, level);
        *slot = 0;
 
        return 0;
@@ -2981,6 +2999,8 @@ again:
                 */
                prev_cmp = -1;
                ret = key_search(b, key, level, &prev_cmp, &slot);
+               if (ret < 0)
+                       goto done;
 
                if (level != 0) {
                        int dec = 0;
@@ -3343,8 +3363,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
        else
                btrfs_node_key(lower, &lower_key, 0);
 
-       c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
-                                  &lower_key, level, root->node->start, 0);
+       c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level,
+                                        root->node->start, 0);
        if (IS_ERR(c))
                return PTR_ERR(c);
 
@@ -3473,8 +3493,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
        mid = (c_nritems + 1) / 2;
        btrfs_node_key(c, &disk_key, mid);
 
-       split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
-                       &disk_key, level, c->start, 0);
+       split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level,
+                                            c->start, 0);
        if (IS_ERR(split))
                return PTR_ERR(split);
 
@@ -3747,7 +3767,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
                return 1;
 
        btrfs_tree_lock(right);
-       btrfs_set_lock_blocking(right);
+       btrfs_set_lock_blocking_write(right);
 
        free_space = btrfs_leaf_free_space(fs_info, right);
        if (free_space < data_size)
@@ -3981,7 +4001,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
                return 1;
 
        btrfs_tree_lock(left);
-       btrfs_set_lock_blocking(left);
+       btrfs_set_lock_blocking_write(left);
 
        free_space = btrfs_leaf_free_space(fs_info, left);
        if (free_space < data_size) {
@@ -4258,8 +4278,8 @@ again:
        else
                btrfs_item_key(l, &disk_key, mid);
 
-       right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
-                       &disk_key, 0, l->start, 0);
+       right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0,
+                                            l->start, 0);
        if (IS_ERR(right))
                return PTR_ERR(right);
 
@@ -5132,6 +5152,10 @@ again:
                nritems = btrfs_header_nritems(cur);
                level = btrfs_header_level(cur);
                sret = btrfs_bin_search(cur, min_key, level, &slot);
+               if (sret < 0) {
+                       ret = sret;
+                       goto out;
+               }
 
                /* at the lowest level, we're done, setup the path and exit */
                if (level == path->lowest_level) {