Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
[sfrench/cifs-2.6.git] / fs / btrfs / ctree.c
index 6795a713b2052bbd7560590dccbba8b33ca6c717..c3df14ce2cc2c00d232028d1238121ff9c37e35c 100644 (file)
@@ -280,7 +280,8 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
 static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                                       struct btrfs_root *root,
                                       struct extent_buffer *buf,
-                                      struct extent_buffer *cow)
+                                      struct extent_buffer *cow,
+                                      int *last_ref)
 {
        u64 refs;
        u64 owner;
@@ -366,6 +367,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                        BUG_ON(ret);
                }
                clean_tree_block(trans, root, buf);
+               *last_ref = 1;
        }
        return 0;
 }
@@ -392,6 +394,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
        struct btrfs_disk_key disk_key;
        struct extent_buffer *cow;
        int level;
+       int last_ref = 0;
        int unlock_orig = 0;
        u64 parent_start;
 
@@ -442,7 +445,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                            (unsigned long)btrfs_header_fsid(cow),
                            BTRFS_FSID_SIZE);
 
-       update_ref_for_cow(trans, root, buf, cow);
+       update_ref_for_cow(trans, root, buf, cow, &last_ref);
+
+       if (root->ref_cows)
+               btrfs_reloc_cow_block(trans, root, buf, cow);
 
        if (buf == root->node) {
                WARN_ON(parent && parent != buf);
@@ -457,8 +463,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                extent_buffer_get(cow);
                spin_unlock(&root->node_lock);
 
-               btrfs_free_tree_block(trans, root, buf->start, buf->len,
-                               parent_start, root->root_key.objectid, level);
+               btrfs_free_tree_block(trans, root, buf, parent_start,
+                                     last_ref);
                free_extent_buffer(buf);
                add_root_to_dirty_list(root);
        } else {
@@ -473,8 +479,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                btrfs_set_node_ptr_generation(parent, parent_slot,
                                              trans->transid);
                btrfs_mark_buffer_dirty(parent);
-               btrfs_free_tree_block(trans, root, buf->start, buf->len,
-                               parent_start, root->root_key.objectid, level);
+               btrfs_free_tree_block(trans, root, buf, parent_start,
+                                     last_ref);
        }
        if (unlock_orig)
                btrfs_tree_unlock(buf);
@@ -949,6 +955,22 @@ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
        return bin_search(eb, key, level, slot);
 }
 
+static void root_add_used(struct btrfs_root *root, u32 size)
+{
+       spin_lock(&root->accounting_lock);
+       btrfs_set_root_used(&root->root_item,
+                           btrfs_root_used(&root->root_item) + size);
+       spin_unlock(&root->accounting_lock);
+}
+
+static void root_sub_used(struct btrfs_root *root, u32 size)
+{
+       spin_lock(&root->accounting_lock);
+       btrfs_set_root_used(&root->root_item,
+                           btrfs_root_used(&root->root_item) - size);
+       spin_unlock(&root->accounting_lock);
+}
+
 /* given a node and slot number, this reads the blocks it points to.  The
  * extent buffer is returned with a reference taken (but unlocked).
  * NULL is returned on error.
@@ -1019,7 +1041,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                btrfs_tree_lock(child);
                btrfs_set_lock_blocking(child);
                ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
-               BUG_ON(ret);
+               if (ret) {
+                       btrfs_tree_unlock(child);
+                       free_extent_buffer(child);
+                       goto enospc;
+               }
 
                spin_lock(&root->node_lock);
                root->node = child;
@@ -1034,11 +1060,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                btrfs_tree_unlock(mid);
                /* once for the path */
                free_extent_buffer(mid);
-               ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
-                                           0, root->root_key.objectid, level);
+
+               root_sub_used(root, mid->len);
+               btrfs_free_tree_block(trans, root, mid, 0, 1);
                /* once for the root ptr */
                free_extent_buffer(mid);
-               return ret;
+               return 0;
        }
        if (btrfs_header_nritems(mid) >
            BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
@@ -1088,23 +1115,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                if (wret < 0 && wret != -ENOSPC)
                        ret = wret;
                if (btrfs_header_nritems(right) == 0) {
-                       u64 bytenr = right->start;
-                       u32 blocksize = right->len;
-
                        clean_tree_block(trans, root, right);
                        btrfs_tree_unlock(right);
-                       free_extent_buffer(right);
-                       right = NULL;
                        wret = del_ptr(trans, root, path, level + 1, pslot +
                                       1);
                        if (wret)
                                ret = wret;
-                       wret = btrfs_free_tree_block(trans, root,
-                                                    bytenr, blocksize, 0,
-                                                    root->root_key.objectid,
-                                                    level);
-                       if (wret)
-                               ret = wret;
+                       root_sub_used(root, right->len);
+                       btrfs_free_tree_block(trans, root, right, 0, 1);
+                       free_extent_buffer(right);
+                       right = NULL;
                } else {
                        struct btrfs_disk_key right_key;
                        btrfs_node_key(right, &right_key, 0);
@@ -1136,21 +1156,15 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                BUG_ON(wret == 1);
        }
        if (btrfs_header_nritems(mid) == 0) {
-               /* we've managed to empty the middle node, drop it */
-               u64 bytenr = mid->start;
-               u32 blocksize = mid->len;
-
                clean_tree_block(trans, root, mid);
                btrfs_tree_unlock(mid);
-               free_extent_buffer(mid);
-               mid = NULL;
                wret = del_ptr(trans, root, path, level + 1, pslot);
                if (wret)
                        ret = wret;
-               wret = btrfs_free_tree_block(trans, root, bytenr, blocksize,
-                                        0, root->root_key.objectid, level);
-               if (wret)
-                       ret = wret;
+               root_sub_used(root, mid->len);
+               btrfs_free_tree_block(trans, root, mid, 0, 1);
+               free_extent_buffer(mid);
+               mid = NULL;
        } else {
                /* update the parent key to reflect our changes */
                struct btrfs_disk_key mid_key;
@@ -1590,7 +1604,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
        btrfs_release_path(NULL, p);
 
        ret = -EAGAIN;
-       tmp = read_tree_block(root, blocknr, blocksize, gen);
+       tmp = read_tree_block(root, blocknr, blocksize, 0);
        if (tmp) {
                /*
                 * If the read above didn't mark this buffer up to date,
@@ -1740,7 +1754,6 @@ again:
                                              p->nodes[level + 1],
                                              p->slots[level + 1], &b);
                        if (err) {
-                               free_extent_buffer(b);
                                ret = err;
                                goto done;
                        }
@@ -2076,6 +2089,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
        if (IS_ERR(c))
                return PTR_ERR(c);
 
+       root_add_used(root, root->nodesize);
+
        memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
        btrfs_set_header_nritems(c, 1);
        btrfs_set_header_level(c, level);
@@ -2134,6 +2149,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
        int nritems;
 
        BUG_ON(!path->nodes[level]);
+       btrfs_assert_tree_locked(path->nodes[level]);
        lower = path->nodes[level];
        nritems = btrfs_header_nritems(lower);
        BUG_ON(slot > nritems);
@@ -2202,6 +2218,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
        if (IS_ERR(split))
                return PTR_ERR(split);
 
+       root_add_used(root, root->nodesize);
+
        memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
        btrfs_set_header_level(split, btrfs_header_level(c));
        btrfs_set_header_bytenr(split, split->start);
@@ -2286,12 +2304,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
        return ret;
 }
 
+/*
+ * min slot controls the lowest index we're willing to push to the
+ * right.  We'll push up to and including min_slot, but no lower
+ */
 static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
                                      struct btrfs_root *root,
                                      struct btrfs_path *path,
                                      int data_size, int empty,
                                      struct extent_buffer *right,
-                                     int free_space, u32 left_nritems)
+                                     int free_space, u32 left_nritems,
+                                     u32 min_slot)
 {
        struct extent_buffer *left = path->nodes[0];
        struct extent_buffer *upper = path->nodes[1];
@@ -2309,7 +2332,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
        if (empty)
                nr = 0;
        else
-               nr = 1;
+               nr = max_t(u32, 1, min_slot);
 
        if (path->slots[0] >= left_nritems)
                push_space += data_size;
@@ -2415,6 +2438,9 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
 
        if (left_nritems)
                btrfs_mark_buffer_dirty(left);
+       else
+               clean_tree_block(trans, root, left);
+
        btrfs_mark_buffer_dirty(right);
 
        btrfs_item_key(right, &disk_key, 0);
@@ -2448,10 +2474,14 @@ out_unlock:
  *
  * returns 1 if the push failed because the other node didn't have enough
  * room, 0 if everything worked out and < 0 if there were major errors.
+ *
+ * this will push starting from min_slot to the end of the leaf.  It won't
+ * push any slot lower than min_slot
  */
 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
-                          *root, struct btrfs_path *path, int data_size,
-                          int empty)
+                          *root, struct btrfs_path *path,
+                          int min_data_size, int data_size,
+                          int empty, u32 min_slot)
 {
        struct extent_buffer *left = path->nodes[0];
        struct extent_buffer *right;
@@ -2493,8 +2523,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
        if (left_nritems == 0)
                goto out_unlock;
 
-       return __push_leaf_right(trans, root, path, data_size, empty,
-                               right, free_space, left_nritems);
+       return __push_leaf_right(trans, root, path, min_data_size, empty,
+                               right, free_space, left_nritems, min_slot);
 out_unlock:
        btrfs_tree_unlock(right);
        free_extent_buffer(right);
@@ -2504,12 +2534,17 @@ out_unlock:
 /*
  * push some data in the path leaf to the left, trying to free up at
  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items.  The
+ * item at 'max_slot' won't be touched.  Use (u32)-1 to make us do all the
+ * items
  */
 static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
                                     struct btrfs_path *path, int data_size,
                                     int empty, struct extent_buffer *left,
-                                    int free_space, int right_nritems)
+                                    int free_space, u32 right_nritems,
+                                    u32 max_slot)
 {
        struct btrfs_disk_key disk_key;
        struct extent_buffer *right = path->nodes[0];
@@ -2528,9 +2563,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
        slot = path->slots[1];
 
        if (empty)
-               nr = right_nritems;
+               nr = min(right_nritems, max_slot);
        else
-               nr = right_nritems - 1;
+               nr = min(right_nritems - 1, max_slot);
 
        for (i = 0; i < nr; i++) {
                item = btrfs_item_nr(right, i);
@@ -2660,6 +2695,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(left);
        if (right_nritems)
                btrfs_mark_buffer_dirty(right);
+       else
+               clean_tree_block(trans, root, right);
 
        btrfs_item_key(right, &disk_key, 0);
        wret = fixup_low_keys(trans, root, path, &disk_key, 1);
@@ -2669,8 +2706,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
        /* then fixup the leaf pointer in the path */
        if (path->slots[0] < push_items) {
                path->slots[0] += old_left_nritems;
-               if (btrfs_header_nritems(path->nodes[0]) == 0)
-                       clean_tree_block(trans, root, path->nodes[0]);
                btrfs_tree_unlock(path->nodes[0]);
                free_extent_buffer(path->nodes[0]);
                path->nodes[0] = left;
@@ -2691,10 +2726,14 @@ out:
 /*
  * push some data in the path leaf to the left, trying to free up at
  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items.  The
+ * item at 'max_slot' won't be touched.  Use (u32)-1 to make us push all the
+ * items
  */
 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
-                         *root, struct btrfs_path *path, int data_size,
-                         int empty)
+                         *root, struct btrfs_path *path, int min_data_size,
+                         int data_size, int empty, u32 max_slot)
 {
        struct extent_buffer *right = path->nodes[0];
        struct extent_buffer *left;
@@ -2740,8 +2779,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
                goto out;
        }
 
-       return __push_leaf_left(trans, root, path, data_size,
-                              empty, left, free_space, right_nritems);
+       return __push_leaf_left(trans, root, path, min_data_size,
+                              empty, left, free_space, right_nritems,
+                              max_slot);
 out:
        btrfs_tree_unlock(left);
        free_extent_buffer(left);
@@ -2833,6 +2873,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
        return ret;
 }
 
+/*
+ * double splits happen when we need to insert a big item in the middle
+ * of a leaf.  A double split can leave us with 3 mostly empty leaves:
+ * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
+ *          A                 B                 C
+ *
+ * We avoid this by trying to push the items on either side of our target
+ * into the adjacent leaves.  If all goes well we can avoid the double split
+ * completely.
+ */
+static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
+                                         struct btrfs_root *root,
+                                         struct btrfs_path *path,
+                                         int data_size)
+{
+       int ret;
+       int progress = 0;
+       int slot;
+       u32 nritems;
+
+       slot = path->slots[0];
+
+       /*
+        * try to push all the items after our slot into the
+        * right leaf
+        */
+       ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
+       if (ret < 0)
+               return ret;
+
+       if (ret == 0)
+               progress++;
+
+       nritems = btrfs_header_nritems(path->nodes[0]);
+       /*
+        * our goal is to get our slot at the start or end of a leaf.  If
+        * we've done so we're done
+        */
+       if (path->slots[0] == 0 || path->slots[0] == nritems)
+               return 0;
+
+       if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+               return 0;
+
+       /* try to push all the items before our slot into the next leaf */
+       slot = path->slots[0];
+       ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
+       if (ret < 0)
+               return ret;
+
+       if (ret == 0)
+               progress++;
+
+       if (progress)
+               return 0;
+       return 1;
+}
+
 /*
  * split the path's leaf in two, making sure there is at least data_size
  * available for the resulting leaf level of the path.
@@ -2855,6 +2953,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
        int wret;
        int split;
        int num_doubles = 0;
+       int tried_avoid_double = 0;
 
        l = path->nodes[0];
        slot = path->slots[0];
@@ -2863,12 +2962,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
                return -EOVERFLOW;
 
        /* first try to make some room by pushing left and right */
-       if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
-               wret = push_leaf_right(trans, root, path, data_size, 0);
+       if (data_size) {
+               wret = push_leaf_right(trans, root, path, data_size,
+                                      data_size, 0, 0);
                if (wret < 0)
                        return wret;
                if (wret) {
-                       wret = push_leaf_left(trans, root, path, data_size, 0);
+                       wret = push_leaf_left(trans, root, path, data_size,
+                                             data_size, 0, (u32)-1);
                        if (wret < 0)
                                return wret;
                }
@@ -2902,6 +3003,8 @@ again:
                                if (mid != nritems &&
                                    leaf_space_used(l, mid, nritems - mid) +
                                    data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+                                       if (data_size && !tried_avoid_double)
+                                               goto push_for_double;
                                        split = 2;
                                }
                        }
@@ -2918,6 +3021,8 @@ again:
                                if (mid != nritems &&
                                    leaf_space_used(l, mid, nritems - mid) +
                                    data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+                                       if (data_size && !tried_avoid_double)
+                                               goto push_for_double;
                                        split = 2 ;
                                }
                        }
@@ -2932,10 +3037,10 @@ again:
        right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
                                        root->root_key.objectid,
                                        &disk_key, 0, l->start, 0);
-       if (IS_ERR(right)) {
-               BUG_ON(1);
+       if (IS_ERR(right))
                return PTR_ERR(right);
-       }
+
+       root_add_used(root, root->leafsize);
 
        memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
        btrfs_set_header_bytenr(right, right->start);
@@ -2998,6 +3103,13 @@ again:
        }
 
        return ret;
+
+push_for_double:
+       push_for_double_split(trans, root, path, data_size);
+       tried_avoid_double = 1;
+       if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+               return 0;
+       goto again;
 }
 
 static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
@@ -3054,7 +3166,8 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
 
        btrfs_set_path_blocking(path);
        ret = split_leaf(trans, root, &key, path, ins_len, 1);
-       BUG_ON(ret);
+       if (ret)
+               goto err;
 
        path->keep_locks = 0;
        btrfs_unlock_up_safe(path, 1);
@@ -3796,9 +3909,10 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
         */
        btrfs_unlock_up_safe(path, 0);
 
-       ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len,
-                                   0, root->root_key.objectid, 0);
-       return ret;
+       root_sub_used(root, leaf->len);
+
+       btrfs_free_tree_block(trans, root, leaf, 0, 1);
+       return 0;
 }
 /*
  * delete the item at the leaf level in path.  If that empties
@@ -3865,6 +3979,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                if (leaf == root->node) {
                        btrfs_set_header_level(leaf, 0);
                } else {
+                       btrfs_set_path_blocking(path);
+                       clean_tree_block(trans, root, leaf);
                        ret = btrfs_del_leaf(trans, root, path, leaf);
                        BUG_ON(ret);
                }
@@ -3890,13 +4006,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                        extent_buffer_get(leaf);
 
                        btrfs_set_path_blocking(path);
-                       wret = push_leaf_left(trans, root, path, 1, 1);
+                       wret = push_leaf_left(trans, root, path, 1, 1,
+                                             1, (u32)-1);
                        if (wret < 0 && wret != -ENOSPC)
                                ret = wret;
 
                        if (path->nodes[0] == leaf &&
                            btrfs_header_nritems(leaf)) {
-                               wret = push_leaf_right(trans, root, path, 1, 1);
+                               wret = push_leaf_right(trans, root, path, 1,
+                                                      1, 1, 0);
                                if (wret < 0 && wret != -ENOSPC)
                                        ret = wret;
                        }