Btrfs: remove btrfs_init_path
[sfrench/cifs-2.6.git] / fs / btrfs / ctree.c
1 /*
2  * Copyright (C) 2007,2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "transaction.h"
23 #include "print-tree.h"
24 #include "locking.h"
25
26 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
27                       *root, struct btrfs_path *path, int level);
28 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
29                       *root, struct btrfs_key *ins_key,
30                       struct btrfs_path *path, int data_size, int extend);
31 static int push_node_left(struct btrfs_trans_handle *trans,
32                           struct btrfs_root *root, struct extent_buffer *dst,
33                           struct extent_buffer *src, int empty);
34 static int balance_node_right(struct btrfs_trans_handle *trans,
35                               struct btrfs_root *root,
36                               struct extent_buffer *dst_buf,
37                               struct extent_buffer *src_buf);
38 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
39                    struct btrfs_path *path, int level, int slot);
40
41 struct btrfs_path *btrfs_alloc_path(void)
42 {
43         struct btrfs_path *path;
44         path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
45         if (path)
46                 path->reada = 1;
47         return path;
48 }
49
50 /*
51  * set all locked nodes in the path to blocking locks.  This should
52  * be done before scheduling
53  */
54 noinline void btrfs_set_path_blocking(struct btrfs_path *p)
55 {
56         int i;
57         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
58                 if (p->nodes[i] && p->locks[i])
59                         btrfs_set_lock_blocking(p->nodes[i]);
60         }
61 }
62
63 /*
64  * reset all the locked nodes in the patch to spinning locks.
65  */
66 noinline void btrfs_clear_path_blocking(struct btrfs_path *p)
67 {
68         int i;
69         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
70                 if (p->nodes[i] && p->locks[i])
71                         btrfs_clear_lock_blocking(p->nodes[i]);
72         }
73 }
74
75 /* this also releases the path */
76 void btrfs_free_path(struct btrfs_path *p)
77 {
78         btrfs_release_path(NULL, p);
79         kmem_cache_free(btrfs_path_cachep, p);
80 }
81
82 /*
83  * path release drops references on the extent buffers in the path
84  * and it drops any locks held by this path
85  *
86  * It is safe to call this on paths that no locks or extent buffers held.
87  */
88 noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
89 {
90         int i;
91
92         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
93                 p->slots[i] = 0;
94                 if (!p->nodes[i])
95                         continue;
96                 if (p->locks[i]) {
97                         btrfs_tree_unlock(p->nodes[i]);
98                         p->locks[i] = 0;
99                 }
100                 free_extent_buffer(p->nodes[i]);
101                 p->nodes[i] = NULL;
102         }
103 }
104
105 /*
106  * safely gets a reference on the root node of a tree.  A lock
107  * is not taken, so a concurrent writer may put a different node
108  * at the root of the tree.  See btrfs_lock_root_node for the
109  * looping required.
110  *
111  * The extent buffer returned by this has a reference taken, so
112  * it won't disappear.  It may stop being the root of the tree
113  * at any time because there are no locks held.
114  */
115 struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
116 {
117         struct extent_buffer *eb;
118         spin_lock(&root->node_lock);
119         eb = root->node;
120         extent_buffer_get(eb);
121         spin_unlock(&root->node_lock);
122         return eb;
123 }
124
125 /* loop around taking references on and locking the root node of the
126  * tree until you end up with a lock on the root.  A locked buffer
127  * is returned, with a reference held.
128  */
129 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
130 {
131         struct extent_buffer *eb;
132
133         while (1) {
134                 eb = btrfs_root_node(root);
135                 btrfs_tree_lock(eb);
136
137                 spin_lock(&root->node_lock);
138                 if (eb == root->node) {
139                         spin_unlock(&root->node_lock);
140                         break;
141                 }
142                 spin_unlock(&root->node_lock);
143
144                 btrfs_tree_unlock(eb);
145                 free_extent_buffer(eb);
146         }
147         return eb;
148 }
149
150 /* cowonly root (everything not a reference counted cow subvolume), just get
151  * put onto a simple dirty list.  transaction.c walks this to make sure they
152  * get properly updated on disk.
153  */
154 static void add_root_to_dirty_list(struct btrfs_root *root)
155 {
156         if (root->track_dirty && list_empty(&root->dirty_list)) {
157                 list_add(&root->dirty_list,
158                          &root->fs_info->dirty_cowonly_roots);
159         }
160 }
161
162 /*
163  * used by snapshot creation to make a copy of a root for a tree with
164  * a given objectid.  The buffer with the new root node is returned in
165  * cow_ret, and this func returns zero on success or a negative error code.
166  */
167 int btrfs_copy_root(struct btrfs_trans_handle *trans,
168                       struct btrfs_root *root,
169                       struct extent_buffer *buf,
170                       struct extent_buffer **cow_ret, u64 new_root_objectid)
171 {
172         struct extent_buffer *cow;
173         u32 nritems;
174         int ret = 0;
175         int level;
176         struct btrfs_root *new_root;
177
178         new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
179         if (!new_root)
180                 return -ENOMEM;
181
182         memcpy(new_root, root, sizeof(*new_root));
183         new_root->root_key.objectid = new_root_objectid;
184
185         WARN_ON(root->ref_cows && trans->transid !=
186                 root->fs_info->running_transaction->transid);
187         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
188
189         level = btrfs_header_level(buf);
190         nritems = btrfs_header_nritems(buf);
191
192         cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0,
193                                      new_root_objectid, trans->transid,
194                                      level, buf->start, 0);
195         if (IS_ERR(cow)) {
196                 kfree(new_root);
197                 return PTR_ERR(cow);
198         }
199
200         copy_extent_buffer(cow, buf, 0, 0, cow->len);
201         btrfs_set_header_bytenr(cow, cow->start);
202         btrfs_set_header_generation(cow, trans->transid);
203         btrfs_set_header_owner(cow, new_root_objectid);
204         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
205
206         write_extent_buffer(cow, root->fs_info->fsid,
207                             (unsigned long)btrfs_header_fsid(cow),
208                             BTRFS_FSID_SIZE);
209
210         WARN_ON(btrfs_header_generation(buf) > trans->transid);
211         ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL);
212         kfree(new_root);
213
214         if (ret)
215                 return ret;
216
217         btrfs_mark_buffer_dirty(cow);
218         *cow_ret = cow;
219         return 0;
220 }
221
222 /*
223  * does the dirty work in cow of a single block.  The parent block (if
224  * supplied) is updated to point to the new cow copy.  The new buffer is marked
225  * dirty and returned locked.  If you modify the block it needs to be marked
226  * dirty again.
227  *
228  * search_start -- an allocation hint for the new block
229  *
230  * empty_size -- a hint that you plan on doing more cow.  This is the size in
231  * bytes the allocator should try to find free next to the block it returns.
232  * This is just a hint and may be ignored by the allocator.
233  *
234  * prealloc_dest -- if you have already reserved a destination for the cow,
235  * this uses that block instead of allocating a new one.
236  * btrfs_alloc_reserved_extent is used to finish the allocation.
237  */
238 static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
239                              struct btrfs_root *root,
240                              struct extent_buffer *buf,
241                              struct extent_buffer *parent, int parent_slot,
242                              struct extent_buffer **cow_ret,
243                              u64 search_start, u64 empty_size,
244                              u64 prealloc_dest)
245 {
246         u64 parent_start;
247         struct extent_buffer *cow;
248         u32 nritems;
249         int ret = 0;
250         int level;
251         int unlock_orig = 0;
252
253         if (*cow_ret == buf)
254                 unlock_orig = 1;
255
256         WARN_ON(!btrfs_tree_locked(buf));
257
258         if (parent)
259                 parent_start = parent->start;
260         else
261                 parent_start = 0;
262
263         WARN_ON(root->ref_cows && trans->transid !=
264                 root->fs_info->running_transaction->transid);
265         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
266
267         level = btrfs_header_level(buf);
268         nritems = btrfs_header_nritems(buf);
269
270         if (prealloc_dest) {
271                 struct btrfs_key ins;
272
273                 ins.objectid = prealloc_dest;
274                 ins.offset = buf->len;
275                 ins.type = BTRFS_EXTENT_ITEM_KEY;
276
277                 ret = btrfs_alloc_reserved_extent(trans, root, parent_start,
278                                                   root->root_key.objectid,
279                                                   trans->transid, level, &ins);
280                 BUG_ON(ret);
281                 cow = btrfs_init_new_buffer(trans, root, prealloc_dest,
282                                             buf->len);
283         } else {
284                 cow = btrfs_alloc_free_block(trans, root, buf->len,
285                                              parent_start,
286                                              root->root_key.objectid,
287                                              trans->transid, level,
288                                              search_start, empty_size);
289         }
290         if (IS_ERR(cow))
291                 return PTR_ERR(cow);
292
293         /* cow is set to blocking by btrfs_init_new_buffer */
294
295         copy_extent_buffer(cow, buf, 0, 0, cow->len);
296         btrfs_set_header_bytenr(cow, cow->start);
297         btrfs_set_header_generation(cow, trans->transid);
298         btrfs_set_header_owner(cow, root->root_key.objectid);
299         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
300
301         write_extent_buffer(cow, root->fs_info->fsid,
302                             (unsigned long)btrfs_header_fsid(cow),
303                             BTRFS_FSID_SIZE);
304
305         WARN_ON(btrfs_header_generation(buf) > trans->transid);
306         if (btrfs_header_generation(buf) != trans->transid) {
307                 u32 nr_extents;
308                 ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
309                 if (ret)
310                         return ret;
311
312                 ret = btrfs_cache_ref(trans, root, buf, nr_extents);
313                 WARN_ON(ret);
314         } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) {
315                 /*
316                  * There are only two places that can drop reference to
317                  * tree blocks owned by living reloc trees, one is here,
318                  * the other place is btrfs_drop_subtree. In both places,
319                  * we check reference count while tree block is locked.
320                  * Furthermore, if reference count is one, it won't get
321                  * increased by someone else.
322                  */
323                 u32 refs;
324                 ret = btrfs_lookup_extent_ref(trans, root, buf->start,
325                                               buf->len, &refs);
326                 BUG_ON(ret);
327                 if (refs == 1) {
328                         ret = btrfs_update_ref(trans, root, buf, cow,
329                                                0, nritems);
330                         clean_tree_block(trans, root, buf);
331                 } else {
332                         ret = btrfs_inc_ref(trans, root, buf, cow, NULL);
333                 }
334                 BUG_ON(ret);
335         } else {
336                 ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
337                 if (ret)
338                         return ret;
339                 clean_tree_block(trans, root, buf);
340         }
341
342         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
343                 ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
344                 WARN_ON(ret);
345         }
346
347         if (buf == root->node) {
348                 WARN_ON(parent && parent != buf);
349
350                 spin_lock(&root->node_lock);
351                 root->node = cow;
352                 extent_buffer_get(cow);
353                 spin_unlock(&root->node_lock);
354
355                 if (buf != root->commit_root) {
356                         btrfs_free_extent(trans, root, buf->start,
357                                           buf->len, buf->start,
358                                           root->root_key.objectid,
359                                           btrfs_header_generation(buf),
360                                           level, 1);
361                 }
362                 free_extent_buffer(buf);
363                 add_root_to_dirty_list(root);
364         } else {
365                 btrfs_set_node_blockptr(parent, parent_slot,
366                                         cow->start);
367                 WARN_ON(trans->transid == 0);
368                 btrfs_set_node_ptr_generation(parent, parent_slot,
369                                               trans->transid);
370                 btrfs_mark_buffer_dirty(parent);
371                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
372                 btrfs_free_extent(trans, root, buf->start, buf->len,
373                                   parent_start, btrfs_header_owner(parent),
374                                   btrfs_header_generation(parent), level, 1);
375         }
376         if (unlock_orig)
377                 btrfs_tree_unlock(buf);
378         free_extent_buffer(buf);
379         btrfs_mark_buffer_dirty(cow);
380         *cow_ret = cow;
381         return 0;
382 }
383
384 /*
385  * cows a single block, see __btrfs_cow_block for the real work.
386  * This version of it has extra checks so that a block isn't cow'd more than
387  * once per transaction, as long as it hasn't been written yet
388  */
389 noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
390                     struct btrfs_root *root, struct extent_buffer *buf,
391                     struct extent_buffer *parent, int parent_slot,
392                     struct extent_buffer **cow_ret, u64 prealloc_dest)
393 {
394         u64 search_start;
395         int ret;
396
397         if (trans->transaction != root->fs_info->running_transaction) {
398                 printk(KERN_CRIT "trans %llu running %llu\n",
399                        (unsigned long long)trans->transid,
400                        (unsigned long long)
401                        root->fs_info->running_transaction->transid);
402                 WARN_ON(1);
403         }
404         if (trans->transid != root->fs_info->generation) {
405                 printk(KERN_CRIT "trans %llu running %llu\n",
406                        (unsigned long long)trans->transid,
407                        (unsigned long long)root->fs_info->generation);
408                 WARN_ON(1);
409         }
410
411         if (btrfs_header_generation(buf) == trans->transid &&
412             btrfs_header_owner(buf) == root->root_key.objectid &&
413             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
414                 *cow_ret = buf;
415                 WARN_ON(prealloc_dest);
416                 return 0;
417         }
418
419         search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
420
421         if (parent)
422                 btrfs_set_lock_blocking(parent);
423         btrfs_set_lock_blocking(buf);
424
425         ret = __btrfs_cow_block(trans, root, buf, parent,
426                                  parent_slot, cow_ret, search_start, 0,
427                                  prealloc_dest);
428         return ret;
429 }
430
431 /*
432  * helper function for defrag to decide if two blocks pointed to by a
433  * node are actually close by
434  */
435 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
436 {
437         if (blocknr < other && other - (blocknr + blocksize) < 32768)
438                 return 1;
439         if (blocknr > other && blocknr - (other + blocksize) < 32768)
440                 return 1;
441         return 0;
442 }
443
444 /*
445  * compare two keys in a memcmp fashion
446  */
447 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
448 {
449         struct btrfs_key k1;
450
451         btrfs_disk_key_to_cpu(&k1, disk);
452
453         if (k1.objectid > k2->objectid)
454                 return 1;
455         if (k1.objectid < k2->objectid)
456                 return -1;
457         if (k1.type > k2->type)
458                 return 1;
459         if (k1.type < k2->type)
460                 return -1;
461         if (k1.offset > k2->offset)
462                 return 1;
463         if (k1.offset < k2->offset)
464                 return -1;
465         return 0;
466 }
467
468 /*
469  * same as comp_keys only with two btrfs_key's
470  */
471 static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
472 {
473         if (k1->objectid > k2->objectid)
474                 return 1;
475         if (k1->objectid < k2->objectid)
476                 return -1;
477         if (k1->type > k2->type)
478                 return 1;
479         if (k1->type < k2->type)
480                 return -1;
481         if (k1->offset > k2->offset)
482                 return 1;
483         if (k1->offset < k2->offset)
484                 return -1;
485         return 0;
486 }
487
488 /*
489  * this is used by the defrag code to go through all the
490  * leaves pointed to by a node and reallocate them so that
491  * disk order is close to key order
492  */
493 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
494                        struct btrfs_root *root, struct extent_buffer *parent,
495                        int start_slot, int cache_only, u64 *last_ret,
496                        struct btrfs_key *progress)
497 {
498         struct extent_buffer *cur;
499         u64 blocknr;
500         u64 gen;
501         u64 search_start = *last_ret;
502         u64 last_block = 0;
503         u64 other;
504         u32 parent_nritems;
505         int end_slot;
506         int i;
507         int err = 0;
508         int parent_level;
509         int uptodate;
510         u32 blocksize;
511         int progress_passed = 0;
512         struct btrfs_disk_key disk_key;
513
514         parent_level = btrfs_header_level(parent);
515         if (cache_only && parent_level != 1)
516                 return 0;
517
518         if (trans->transaction != root->fs_info->running_transaction)
519                 WARN_ON(1);
520         if (trans->transid != root->fs_info->generation)
521                 WARN_ON(1);
522
523         parent_nritems = btrfs_header_nritems(parent);
524         blocksize = btrfs_level_size(root, parent_level - 1);
525         end_slot = parent_nritems;
526
527         if (parent_nritems == 1)
528                 return 0;
529
530         btrfs_set_lock_blocking(parent);
531
532         for (i = start_slot; i < end_slot; i++) {
533                 int close = 1;
534
535                 if (!parent->map_token) {
536                         map_extent_buffer(parent,
537                                         btrfs_node_key_ptr_offset(i),
538                                         sizeof(struct btrfs_key_ptr),
539                                         &parent->map_token, &parent->kaddr,
540                                         &parent->map_start, &parent->map_len,
541                                         KM_USER1);
542                 }
543                 btrfs_node_key(parent, &disk_key, i);
544                 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
545                         continue;
546
547                 progress_passed = 1;
548                 blocknr = btrfs_node_blockptr(parent, i);
549                 gen = btrfs_node_ptr_generation(parent, i);
550                 if (last_block == 0)
551                         last_block = blocknr;
552
553                 if (i > 0) {
554                         other = btrfs_node_blockptr(parent, i - 1);
555                         close = close_blocks(blocknr, other, blocksize);
556                 }
557                 if (!close && i < end_slot - 2) {
558                         other = btrfs_node_blockptr(parent, i + 1);
559                         close = close_blocks(blocknr, other, blocksize);
560                 }
561                 if (close) {
562                         last_block = blocknr;
563                         continue;
564                 }
565                 if (parent->map_token) {
566                         unmap_extent_buffer(parent, parent->map_token,
567                                             KM_USER1);
568                         parent->map_token = NULL;
569                 }
570
571                 cur = btrfs_find_tree_block(root, blocknr, blocksize);
572                 if (cur)
573                         uptodate = btrfs_buffer_uptodate(cur, gen);
574                 else
575                         uptodate = 0;
576                 if (!cur || !uptodate) {
577                         if (cache_only) {
578                                 free_extent_buffer(cur);
579                                 continue;
580                         }
581                         if (!cur) {
582                                 cur = read_tree_block(root, blocknr,
583                                                          blocksize, gen);
584                         } else if (!uptodate) {
585                                 btrfs_read_buffer(cur, gen);
586                         }
587                 }
588                 if (search_start == 0)
589                         search_start = last_block;
590
591                 btrfs_tree_lock(cur);
592                 btrfs_set_lock_blocking(cur);
593                 err = __btrfs_cow_block(trans, root, cur, parent, i,
594                                         &cur, search_start,
595                                         min(16 * blocksize,
596                                             (end_slot - i) * blocksize), 0);
597                 if (err) {
598                         btrfs_tree_unlock(cur);
599                         free_extent_buffer(cur);
600                         break;
601                 }
602                 search_start = cur->start;
603                 last_block = cur->start;
604                 *last_ret = search_start;
605                 btrfs_tree_unlock(cur);
606                 free_extent_buffer(cur);
607         }
608         if (parent->map_token) {
609                 unmap_extent_buffer(parent, parent->map_token,
610                                     KM_USER1);
611                 parent->map_token = NULL;
612         }
613         return err;
614 }
615
616 /*
617  * The leaf data grows from end-to-front in the node.
618  * this returns the address of the start of the last item,
619  * which is the stop of the leaf data stack
620  */
621 static inline unsigned int leaf_data_end(struct btrfs_root *root,
622                                          struct extent_buffer *leaf)
623 {
624         u32 nr = btrfs_header_nritems(leaf);
625         if (nr == 0)
626                 return BTRFS_LEAF_DATA_SIZE(root);
627         return btrfs_item_offset_nr(leaf, nr - 1);
628 }
629
630 /*
631  * extra debugging checks to make sure all the items in a key are
632  * well formed and in the proper order
633  */
634 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
635                       int level)
636 {
637         struct extent_buffer *parent = NULL;
638         struct extent_buffer *node = path->nodes[level];
639         struct btrfs_disk_key parent_key;
640         struct btrfs_disk_key node_key;
641         int parent_slot;
642         int slot;
643         struct btrfs_key cpukey;
644         u32 nritems = btrfs_header_nritems(node);
645
646         if (path->nodes[level + 1])
647                 parent = path->nodes[level + 1];
648
649         slot = path->slots[level];
650         BUG_ON(nritems == 0);
651         if (parent) {
652                 parent_slot = path->slots[level + 1];
653                 btrfs_node_key(parent, &parent_key, parent_slot);
654                 btrfs_node_key(node, &node_key, 0);
655                 BUG_ON(memcmp(&parent_key, &node_key,
656                               sizeof(struct btrfs_disk_key)));
657                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
658                        btrfs_header_bytenr(node));
659         }
660         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
661         if (slot != 0) {
662                 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
663                 btrfs_node_key(node, &node_key, slot);
664                 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
665         }
666         if (slot < nritems - 1) {
667                 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
668                 btrfs_node_key(node, &node_key, slot);
669                 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
670         }
671         return 0;
672 }
673
674 /*
675  * extra checking to make sure all the items in a leaf are
676  * well formed and in the proper order
677  */
678 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
679                       int level)
680 {
681         struct extent_buffer *leaf = path->nodes[level];
682         struct extent_buffer *parent = NULL;
683         int parent_slot;
684         struct btrfs_key cpukey;
685         struct btrfs_disk_key parent_key;
686         struct btrfs_disk_key leaf_key;
687         int slot = path->slots[0];
688
689         u32 nritems = btrfs_header_nritems(leaf);
690
691         if (path->nodes[level + 1])
692                 parent = path->nodes[level + 1];
693
694         if (nritems == 0)
695                 return 0;
696
697         if (parent) {
698                 parent_slot = path->slots[level + 1];
699                 btrfs_node_key(parent, &parent_key, parent_slot);
700                 btrfs_item_key(leaf, &leaf_key, 0);
701
702                 BUG_ON(memcmp(&parent_key, &leaf_key,
703                        sizeof(struct btrfs_disk_key)));
704                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
705                        btrfs_header_bytenr(leaf));
706         }
707         if (slot != 0 && slot < nritems - 1) {
708                 btrfs_item_key(leaf, &leaf_key, slot);
709                 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
710                 if (comp_keys(&leaf_key, &cpukey) <= 0) {
711                         btrfs_print_leaf(root, leaf);
712                         printk(KERN_CRIT "slot %d offset bad key\n", slot);
713                         BUG_ON(1);
714                 }
715                 if (btrfs_item_offset_nr(leaf, slot - 1) !=
716                        btrfs_item_end_nr(leaf, slot)) {
717                         btrfs_print_leaf(root, leaf);
718                         printk(KERN_CRIT "slot %d offset bad\n", slot);
719                         BUG_ON(1);
720                 }
721         }
722         if (slot < nritems - 1) {
723                 btrfs_item_key(leaf, &leaf_key, slot);
724                 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
725                 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
726                 if (btrfs_item_offset_nr(leaf, slot) !=
727                         btrfs_item_end_nr(leaf, slot + 1)) {
728                         btrfs_print_leaf(root, leaf);
729                         printk(KERN_CRIT "slot %d offset bad\n", slot);
730                         BUG_ON(1);
731                 }
732         }
733         BUG_ON(btrfs_item_offset_nr(leaf, 0) +
734                btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
735         return 0;
736 }
737
738 static noinline int check_block(struct btrfs_root *root,
739                                 struct btrfs_path *path, int level)
740 {
741         return 0;
742         if (level == 0)
743                 return check_leaf(root, path, level);
744         return check_node(root, path, level);
745 }
746
747 /*
748  * search for key in the extent_buffer.  The items start at offset p,
749  * and they are item_size apart.  There are 'max' items in p.
750  *
751  * the slot in the array is returned via slot, and it points to
752  * the place where you would insert key if it is not found in
753  * the array.
754  *
755  * slot may point to max if the key is bigger than all of the keys
756  */
757 static noinline int generic_bin_search(struct extent_buffer *eb,
758                                        unsigned long p,
759                                        int item_size, struct btrfs_key *key,
760                                        int max, int *slot)
761 {
762         int low = 0;
763         int high = max;
764         int mid;
765         int ret;
766         struct btrfs_disk_key *tmp = NULL;
767         struct btrfs_disk_key unaligned;
768         unsigned long offset;
769         char *map_token = NULL;
770         char *kaddr = NULL;
771         unsigned long map_start = 0;
772         unsigned long map_len = 0;
773         int err;
774
775         while (low < high) {
776                 mid = (low + high) / 2;
777                 offset = p + mid * item_size;
778
779                 if (!map_token || offset < map_start ||
780                     (offset + sizeof(struct btrfs_disk_key)) >
781                     map_start + map_len) {
782                         if (map_token) {
783                                 unmap_extent_buffer(eb, map_token, KM_USER0);
784                                 map_token = NULL;
785                         }
786
787                         err = map_private_extent_buffer(eb, offset,
788                                                 sizeof(struct btrfs_disk_key),
789                                                 &map_token, &kaddr,
790                                                 &map_start, &map_len, KM_USER0);
791
792                         if (!err) {
793                                 tmp = (struct btrfs_disk_key *)(kaddr + offset -
794                                                         map_start);
795                         } else {
796                                 read_extent_buffer(eb, &unaligned,
797                                                    offset, sizeof(unaligned));
798                                 tmp = &unaligned;
799                         }
800
801                 } else {
802                         tmp = (struct btrfs_disk_key *)(kaddr + offset -
803                                                         map_start);
804                 }
805                 ret = comp_keys(tmp, key);
806
807                 if (ret < 0)
808                         low = mid + 1;
809                 else if (ret > 0)
810                         high = mid;
811                 else {
812                         *slot = mid;
813                         if (map_token)
814                                 unmap_extent_buffer(eb, map_token, KM_USER0);
815                         return 0;
816                 }
817         }
818         *slot = low;
819         if (map_token)
820                 unmap_extent_buffer(eb, map_token, KM_USER0);
821         return 1;
822 }
823
824 /*
825  * simple bin_search frontend that does the right thing for
826  * leaves vs nodes
827  */
828 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
829                       int level, int *slot)
830 {
831         if (level == 0) {
832                 return generic_bin_search(eb,
833                                           offsetof(struct btrfs_leaf, items),
834                                           sizeof(struct btrfs_item),
835                                           key, btrfs_header_nritems(eb),
836                                           slot);
837         } else {
838                 return generic_bin_search(eb,
839                                           offsetof(struct btrfs_node, ptrs),
840                                           sizeof(struct btrfs_key_ptr),
841                                           key, btrfs_header_nritems(eb),
842                                           slot);
843         }
844         return -1;
845 }
846
847 /* given a node and slot number, this reads the blocks it points to.  The
848  * extent buffer is returned with a reference taken (but unlocked).
849  * NULL is returned on error.
850  */
851 static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
852                                    struct extent_buffer *parent, int slot)
853 {
854         int level = btrfs_header_level(parent);
855         if (slot < 0)
856                 return NULL;
857         if (slot >= btrfs_header_nritems(parent))
858                 return NULL;
859
860         BUG_ON(level == 0);
861
862         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
863                        btrfs_level_size(root, level - 1),
864                        btrfs_node_ptr_generation(parent, slot));
865 }
866
867 /*
868  * node level balancing, used to make sure nodes are in proper order for
869  * item deletion.  We balance from the top down, so we have to make sure
870  * that a deletion won't leave an node completely empty later on.
871  */
872 static noinline int balance_level(struct btrfs_trans_handle *trans,
873                          struct btrfs_root *root,
874                          struct btrfs_path *path, int level)
875 {
876         struct extent_buffer *right = NULL;
877         struct extent_buffer *mid;
878         struct extent_buffer *left = NULL;
879         struct extent_buffer *parent = NULL;
880         int ret = 0;
881         int wret;
882         int pslot;
883         int orig_slot = path->slots[level];
884         int err_on_enospc = 0;
885         u64 orig_ptr;
886
887         if (level == 0)
888                 return 0;
889
890         mid = path->nodes[level];
891
892         WARN_ON(!path->locks[level]);
893         WARN_ON(btrfs_header_generation(mid) != trans->transid);
894
895         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
896
897         if (level < BTRFS_MAX_LEVEL - 1)
898                 parent = path->nodes[level + 1];
899         pslot = path->slots[level + 1];
900
901         /*
902          * deal with the case where there is only one pointer in the root
903          * by promoting the node below to a root
904          */
905         if (!parent) {
906                 struct extent_buffer *child;
907
908                 if (btrfs_header_nritems(mid) != 1)
909                         return 0;
910
911                 /* promote the child to a root */
912                 child = read_node_slot(root, mid, 0);
913                 BUG_ON(!child);
914                 btrfs_tree_lock(child);
915                 btrfs_set_lock_blocking(child);
916                 ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0);
917                 BUG_ON(ret);
918
919                 spin_lock(&root->node_lock);
920                 root->node = child;
921                 spin_unlock(&root->node_lock);
922
923                 ret = btrfs_update_extent_ref(trans, root, child->start,
924                                               mid->start, child->start,
925                                               root->root_key.objectid,
926                                               trans->transid, level - 1);
927                 BUG_ON(ret);
928
929                 add_root_to_dirty_list(root);
930                 btrfs_tree_unlock(child);
931
932                 path->locks[level] = 0;
933                 path->nodes[level] = NULL;
934                 clean_tree_block(trans, root, mid);
935                 btrfs_tree_unlock(mid);
936                 /* once for the path */
937                 free_extent_buffer(mid);
938                 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
939                                         mid->start, root->root_key.objectid,
940                                         btrfs_header_generation(mid),
941                                         level, 1);
942                 /* once for the root ptr */
943                 free_extent_buffer(mid);
944                 return ret;
945         }
946         if (btrfs_header_nritems(mid) >
947             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
948                 return 0;
949
950         if (btrfs_header_nritems(mid) < 2)
951                 err_on_enospc = 1;
952
953         left = read_node_slot(root, parent, pslot - 1);
954         if (left) {
955                 btrfs_tree_lock(left);
956                 btrfs_set_lock_blocking(left);
957                 wret = btrfs_cow_block(trans, root, left,
958                                        parent, pslot - 1, &left, 0);
959                 if (wret) {
960                         ret = wret;
961                         goto enospc;
962                 }
963         }
964         right = read_node_slot(root, parent, pslot + 1);
965         if (right) {
966                 btrfs_tree_lock(right);
967                 btrfs_set_lock_blocking(right);
968                 wret = btrfs_cow_block(trans, root, right,
969                                        parent, pslot + 1, &right, 0);
970                 if (wret) {
971                         ret = wret;
972                         goto enospc;
973                 }
974         }
975
976         /* first, try to make some room in the middle buffer */
977         if (left) {
978                 orig_slot += btrfs_header_nritems(left);
979                 wret = push_node_left(trans, root, left, mid, 1);
980                 if (wret < 0)
981                         ret = wret;
982                 if (btrfs_header_nritems(mid) < 2)
983                         err_on_enospc = 1;
984         }
985
986         /*
987          * then try to empty the right most buffer into the middle
988          */
989         if (right) {
990                 wret = push_node_left(trans, root, mid, right, 1);
991                 if (wret < 0 && wret != -ENOSPC)
992                         ret = wret;
993                 if (btrfs_header_nritems(right) == 0) {
994                         u64 bytenr = right->start;
995                         u64 generation = btrfs_header_generation(parent);
996                         u32 blocksize = right->len;
997
998                         clean_tree_block(trans, root, right);
999                         btrfs_tree_unlock(right);
1000                         free_extent_buffer(right);
1001                         right = NULL;
1002                         wret = del_ptr(trans, root, path, level + 1, pslot +
1003                                        1);
1004                         if (wret)
1005                                 ret = wret;
1006                         wret = btrfs_free_extent(trans, root, bytenr,
1007                                                  blocksize, parent->start,
1008                                                  btrfs_header_owner(parent),
1009                                                  generation, level, 1);
1010                         if (wret)
1011                                 ret = wret;
1012                 } else {
1013                         struct btrfs_disk_key right_key;
1014                         btrfs_node_key(right, &right_key, 0);
1015                         btrfs_set_node_key(parent, &right_key, pslot + 1);
1016                         btrfs_mark_buffer_dirty(parent);
1017                 }
1018         }
1019         if (btrfs_header_nritems(mid) == 1) {
1020                 /*
1021                  * we're not allowed to leave a node with one item in the
1022                  * tree during a delete.  A deletion from lower in the tree
1023                  * could try to delete the only pointer in this node.
1024                  * So, pull some keys from the left.
1025                  * There has to be a left pointer at this point because
1026                  * otherwise we would have pulled some pointers from the
1027                  * right
1028                  */
1029                 BUG_ON(!left);
1030                 wret = balance_node_right(trans, root, mid, left);
1031                 if (wret < 0) {
1032                         ret = wret;
1033                         goto enospc;
1034                 }
1035                 if (wret == 1) {
1036                         wret = push_node_left(trans, root, left, mid, 1);
1037                         if (wret < 0)
1038                                 ret = wret;
1039                 }
1040                 BUG_ON(wret == 1);
1041         }
1042         if (btrfs_header_nritems(mid) == 0) {
1043                 /* we've managed to empty the middle node, drop it */
1044                 u64 root_gen = btrfs_header_generation(parent);
1045                 u64 bytenr = mid->start;
1046                 u32 blocksize = mid->len;
1047
1048                 clean_tree_block(trans, root, mid);
1049                 btrfs_tree_unlock(mid);
1050                 free_extent_buffer(mid);
1051                 mid = NULL;
1052                 wret = del_ptr(trans, root, path, level + 1, pslot);
1053                 if (wret)
1054                         ret = wret;
1055                 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
1056                                          parent->start,
1057                                          btrfs_header_owner(parent),
1058                                          root_gen, level, 1);
1059                 if (wret)
1060                         ret = wret;
1061         } else {
1062                 /* update the parent key to reflect our changes */
1063                 struct btrfs_disk_key mid_key;
1064                 btrfs_node_key(mid, &mid_key, 0);
1065                 btrfs_set_node_key(parent, &mid_key, pslot);
1066                 btrfs_mark_buffer_dirty(parent);
1067         }
1068
1069         /* update the path */
1070         if (left) {
1071                 if (btrfs_header_nritems(left) > orig_slot) {
1072                         extent_buffer_get(left);
1073                         /* left was locked after cow */
1074                         path->nodes[level] = left;
1075                         path->slots[level + 1] -= 1;
1076                         path->slots[level] = orig_slot;
1077                         if (mid) {
1078                                 btrfs_tree_unlock(mid);
1079                                 free_extent_buffer(mid);
1080                         }
1081                 } else {
1082                         orig_slot -= btrfs_header_nritems(left);
1083                         path->slots[level] = orig_slot;
1084                 }
1085         }
1086         /* double check we haven't messed things up */
1087         check_block(root, path, level);
1088         if (orig_ptr !=
1089             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
1090                 BUG();
1091 enospc:
1092         if (right) {
1093                 btrfs_tree_unlock(right);
1094                 free_extent_buffer(right);
1095         }
1096         if (left) {
1097                 if (path->nodes[level] != left)
1098                         btrfs_tree_unlock(left);
1099                 free_extent_buffer(left);
1100         }
1101         return ret;
1102 }
1103
1104 /* Node balancing for insertion.  Here we only split or push nodes around
1105  * when they are completely full.  This is also done top down, so we
1106  * have to be pessimistic.
1107  */
1108 static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1109                                           struct btrfs_root *root,
1110                                           struct btrfs_path *path, int level)
1111 {
1112         struct extent_buffer *right = NULL;
1113         struct extent_buffer *mid;
1114         struct extent_buffer *left = NULL;
1115         struct extent_buffer *parent = NULL;
1116         int ret = 0;
1117         int wret;
1118         int pslot;
1119         int orig_slot = path->slots[level];
1120         u64 orig_ptr;
1121
1122         if (level == 0)
1123                 return 1;
1124
1125         mid = path->nodes[level];
1126         WARN_ON(btrfs_header_generation(mid) != trans->transid);
1127         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
1128
1129         if (level < BTRFS_MAX_LEVEL - 1)
1130                 parent = path->nodes[level + 1];
1131         pslot = path->slots[level + 1];
1132
1133         if (!parent)
1134                 return 1;
1135
1136         left = read_node_slot(root, parent, pslot - 1);
1137
1138         /* first, try to make some room in the middle buffer */
1139         if (left) {
1140                 u32 left_nr;
1141
1142                 btrfs_tree_lock(left);
1143                 btrfs_set_lock_blocking(left);
1144
1145                 left_nr = btrfs_header_nritems(left);
1146                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1147                         wret = 1;
1148                 } else {
1149                         ret = btrfs_cow_block(trans, root, left, parent,
1150                                               pslot - 1, &left, 0);
1151                         if (ret)
1152                                 wret = 1;
1153                         else {
1154                                 wret = push_node_left(trans, root,
1155                                                       left, mid, 0);
1156                         }
1157                 }
1158                 if (wret < 0)
1159                         ret = wret;
1160                 if (wret == 0) {
1161                         struct btrfs_disk_key disk_key;
1162                         orig_slot += left_nr;
1163                         btrfs_node_key(mid, &disk_key, 0);
1164                         btrfs_set_node_key(parent, &disk_key, pslot);
1165                         btrfs_mark_buffer_dirty(parent);
1166                         if (btrfs_header_nritems(left) > orig_slot) {
1167                                 path->nodes[level] = left;
1168                                 path->slots[level + 1] -= 1;
1169                                 path->slots[level] = orig_slot;
1170                                 btrfs_tree_unlock(mid);
1171                                 free_extent_buffer(mid);
1172                         } else {
1173                                 orig_slot -=
1174                                         btrfs_header_nritems(left);
1175                                 path->slots[level] = orig_slot;
1176                                 btrfs_tree_unlock(left);
1177                                 free_extent_buffer(left);
1178                         }
1179                         return 0;
1180                 }
1181                 btrfs_tree_unlock(left);
1182                 free_extent_buffer(left);
1183         }
1184         right = read_node_slot(root, parent, pslot + 1);
1185
1186         /*
1187          * then try to empty the right most buffer into the middle
1188          */
1189         if (right) {
1190                 u32 right_nr;
1191
1192                 btrfs_tree_lock(right);
1193                 btrfs_set_lock_blocking(right);
1194
1195                 right_nr = btrfs_header_nritems(right);
1196                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1197                         wret = 1;
1198                 } else {
1199                         ret = btrfs_cow_block(trans, root, right,
1200                                               parent, pslot + 1,
1201                                               &right, 0);
1202                         if (ret)
1203                                 wret = 1;
1204                         else {
1205                                 wret = balance_node_right(trans, root,
1206                                                           right, mid);
1207                         }
1208                 }
1209                 if (wret < 0)
1210                         ret = wret;
1211                 if (wret == 0) {
1212                         struct btrfs_disk_key disk_key;
1213
1214                         btrfs_node_key(right, &disk_key, 0);
1215                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
1216                         btrfs_mark_buffer_dirty(parent);
1217
1218                         if (btrfs_header_nritems(mid) <= orig_slot) {
1219                                 path->nodes[level] = right;
1220                                 path->slots[level + 1] += 1;
1221                                 path->slots[level] = orig_slot -
1222                                         btrfs_header_nritems(mid);
1223                                 btrfs_tree_unlock(mid);
1224                                 free_extent_buffer(mid);
1225                         } else {
1226                                 btrfs_tree_unlock(right);
1227                                 free_extent_buffer(right);
1228                         }
1229                         return 0;
1230                 }
1231                 btrfs_tree_unlock(right);
1232                 free_extent_buffer(right);
1233         }
1234         return 1;
1235 }
1236
1237 /*
1238  * readahead one full node of leaves, finding things that are close
1239  * to the block in 'slot', and triggering ra on them.
1240  */
1241 static noinline void reada_for_search(struct btrfs_root *root,
1242                                       struct btrfs_path *path,
1243                                       int level, int slot, u64 objectid)
1244 {
1245         struct extent_buffer *node;
1246         struct btrfs_disk_key disk_key;
1247         u32 nritems;
1248         u64 search;
1249         u64 target;
1250         u64 nread = 0;
1251         int direction = path->reada;
1252         struct extent_buffer *eb;
1253         u32 nr;
1254         u32 blocksize;
1255         u32 nscan = 0;
1256
1257         if (level != 1)
1258                 return;
1259
1260         if (!path->nodes[level])
1261                 return;
1262
1263         node = path->nodes[level];
1264
1265         search = btrfs_node_blockptr(node, slot);
1266         blocksize = btrfs_level_size(root, level - 1);
1267         eb = btrfs_find_tree_block(root, search, blocksize);
1268         if (eb) {
1269                 free_extent_buffer(eb);
1270                 return;
1271         }
1272
1273         target = search;
1274
1275         nritems = btrfs_header_nritems(node);
1276         nr = slot;
1277         while (1) {
1278                 if (direction < 0) {
1279                         if (nr == 0)
1280                                 break;
1281                         nr--;
1282                 } else if (direction > 0) {
1283                         nr++;
1284                         if (nr >= nritems)
1285                                 break;
1286                 }
1287                 if (path->reada < 0 && objectid) {
1288                         btrfs_node_key(node, &disk_key, nr);
1289                         if (btrfs_disk_key_objectid(&disk_key) != objectid)
1290                                 break;
1291                 }
1292                 search = btrfs_node_blockptr(node, nr);
1293                 if ((search <= target && target - search <= 65536) ||
1294                     (search > target && search - target <= 65536)) {
1295                         readahead_tree_block(root, search, blocksize,
1296                                      btrfs_node_ptr_generation(node, nr));
1297                         nread += blocksize;
1298                 }
1299                 nscan++;
1300                 if ((nread > 65536 || nscan > 32))
1301                         break;
1302         }
1303 }
1304
1305 /*
1306  * returns -EAGAIN if it had to drop the path, or zero if everything was in
1307  * cache
1308  */
1309 static noinline int reada_for_balance(struct btrfs_root *root,
1310                                       struct btrfs_path *path, int level)
1311 {
1312         int slot;
1313         int nritems;
1314         struct extent_buffer *parent;
1315         struct extent_buffer *eb;
1316         u64 gen;
1317         u64 block1 = 0;
1318         u64 block2 = 0;
1319         int ret = 0;
1320         int blocksize;
1321
1322         parent = path->nodes[level - 1];
1323         if (!parent)
1324                 return 0;
1325
1326         nritems = btrfs_header_nritems(parent);
1327         slot = path->slots[level];
1328         blocksize = btrfs_level_size(root, level);
1329
1330         if (slot > 0) {
1331                 block1 = btrfs_node_blockptr(parent, slot - 1);
1332                 gen = btrfs_node_ptr_generation(parent, slot - 1);
1333                 eb = btrfs_find_tree_block(root, block1, blocksize);
1334                 if (eb && btrfs_buffer_uptodate(eb, gen))
1335                         block1 = 0;
1336                 free_extent_buffer(eb);
1337         }
1338         if (slot < nritems) {
1339                 block2 = btrfs_node_blockptr(parent, slot + 1);
1340                 gen = btrfs_node_ptr_generation(parent, slot + 1);
1341                 eb = btrfs_find_tree_block(root, block2, blocksize);
1342                 if (eb && btrfs_buffer_uptodate(eb, gen))
1343                         block2 = 0;
1344                 free_extent_buffer(eb);
1345         }
1346         if (block1 || block2) {
1347                 ret = -EAGAIN;
1348                 btrfs_release_path(root, path);
1349                 if (block1)
1350                         readahead_tree_block(root, block1, blocksize, 0);
1351                 if (block2)
1352                         readahead_tree_block(root, block2, blocksize, 0);
1353
1354                 if (block1) {
1355                         eb = read_tree_block(root, block1, blocksize, 0);
1356                         free_extent_buffer(eb);
1357                 }
1358                 if (block1) {
1359                         eb = read_tree_block(root, block2, blocksize, 0);
1360                         free_extent_buffer(eb);
1361                 }
1362         }
1363         return ret;
1364 }
1365
1366
1367 /*
1368  * when we walk down the tree, it is usually safe to unlock the higher layers
1369  * in the tree.  The exceptions are when our path goes through slot 0, because
1370  * operations on the tree might require changing key pointers higher up in the
1371  * tree.
1372  *
1373  * callers might also have set path->keep_locks, which tells this code to keep
1374  * the lock if the path points to the last slot in the block.  This is part of
1375  * walking through the tree, and selecting the next slot in the higher block.
1376  *
1377  * lowest_unlock sets the lowest level in the tree we're allowed to unlock.  so
1378  * if lowest_unlock is 1, level 0 won't be unlocked
1379  */
1380 static noinline void unlock_up(struct btrfs_path *path, int level,
1381                                int lowest_unlock)
1382 {
1383         int i;
1384         int skip_level = level;
1385         int no_skips = 0;
1386         struct extent_buffer *t;
1387
1388         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1389                 if (!path->nodes[i])
1390                         break;
1391                 if (!path->locks[i])
1392                         break;
1393                 if (!no_skips && path->slots[i] == 0) {
1394                         skip_level = i + 1;
1395                         continue;
1396                 }
1397                 if (!no_skips && path->keep_locks) {
1398                         u32 nritems;
1399                         t = path->nodes[i];
1400                         nritems = btrfs_header_nritems(t);
1401                         if (nritems < 1 || path->slots[i] >= nritems - 1) {
1402                                 skip_level = i + 1;
1403                                 continue;
1404                         }
1405                 }
1406                 if (skip_level < i && i >= lowest_unlock)
1407                         no_skips = 1;
1408
1409                 t = path->nodes[i];
1410                 if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
1411                         btrfs_tree_unlock(t);
1412                         path->locks[i] = 0;
1413                 }
1414         }
1415 }
1416
1417 /*
1418  * This releases any locks held in the path starting at level and
1419  * going all the way up to the root.
1420  *
1421  * btrfs_search_slot will keep the lock held on higher nodes in a few
1422  * corner cases, such as COW of the block at slot zero in the node.  This
1423  * ignores those rules, and it should only be called when there are no
1424  * more updates to be done higher up in the tree.
1425  */
1426 noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
1427 {
1428         int i;
1429
1430         if (path->keep_locks || path->lowest_level)
1431                 return;
1432
1433         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1434                 if (!path->nodes[i])
1435                         continue;
1436                 if (!path->locks[i])
1437                         continue;
1438                 btrfs_tree_unlock(path->nodes[i]);
1439                 path->locks[i] = 0;
1440         }
1441 }
1442
1443 /*
1444  * look for key in the tree.  path is filled in with nodes along the way
1445  * if key is found, we return zero and you can find the item in the leaf
1446  * level of the path (level 0)
1447  *
1448  * If the key isn't found, the path points to the slot where it should
1449  * be inserted, and 1 is returned.  If there are other errors during the
1450  * search a negative error number is returned.
1451  *
1452  * if ins_len > 0, nodes and leaves will be split as we walk down the
1453  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1454  * possible)
1455  */
1456 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1457                       *root, struct btrfs_key *key, struct btrfs_path *p, int
1458                       ins_len, int cow)
1459 {
1460         struct extent_buffer *b;
1461         struct extent_buffer *tmp;
1462         int slot;
1463         int ret;
1464         int level;
1465         int should_reada = p->reada;
1466         int lowest_unlock = 1;
1467         int blocksize;
1468         u8 lowest_level = 0;
1469         u64 blocknr;
1470         u64 gen;
1471         struct btrfs_key prealloc_block;
1472
1473         lowest_level = p->lowest_level;
1474         WARN_ON(lowest_level && ins_len > 0);
1475         WARN_ON(p->nodes[0] != NULL);
1476
1477         if (ins_len < 0)
1478                 lowest_unlock = 2;
1479
1480         prealloc_block.objectid = 0;
1481
1482 again:
1483         if (p->skip_locking)
1484                 b = btrfs_root_node(root);
1485         else
1486                 b = btrfs_lock_root_node(root);
1487
1488         while (b) {
1489                 level = btrfs_header_level(b);
1490
1491                 /*
1492                  * setup the path here so we can release it under lock
1493                  * contention with the cow code
1494                  */
1495                 p->nodes[level] = b;
1496                 if (!p->skip_locking)
1497                         p->locks[level] = 1;
1498
1499                 if (cow) {
1500                         int wret;
1501
1502                         /* is a cow on this block not required */
1503                         if (btrfs_header_generation(b) == trans->transid &&
1504                             btrfs_header_owner(b) == root->root_key.objectid &&
1505                             !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
1506                                 goto cow_done;
1507                         }
1508
1509                         /* ok, we have to cow, is our old prealloc the right
1510                          * size?
1511                          */
1512                         if (prealloc_block.objectid &&
1513                             prealloc_block.offset != b->len) {
1514                                 btrfs_release_path(root, p);
1515                                 btrfs_free_reserved_extent(root,
1516                                            prealloc_block.objectid,
1517                                            prealloc_block.offset);
1518                                 prealloc_block.objectid = 0;
1519                                 goto again;
1520                         }
1521
1522                         /*
1523                          * for higher level blocks, try not to allocate blocks
1524                          * with the block and the parent locks held.
1525                          */
1526                         if (level > 0 && !prealloc_block.objectid) {
1527                                 u32 size = b->len;
1528                                 u64 hint = b->start;
1529
1530                                 btrfs_release_path(root, p);
1531                                 ret = btrfs_reserve_extent(trans, root,
1532                                                            size, size, 0,
1533                                                            hint, (u64)-1,
1534                                                            &prealloc_block, 0);
1535                                 BUG_ON(ret);
1536                                 goto again;
1537                         }
1538
1539                         btrfs_set_path_blocking(p);
1540
1541                         wret = btrfs_cow_block(trans, root, b,
1542                                                p->nodes[level + 1],
1543                                                p->slots[level + 1],
1544                                                &b, prealloc_block.objectid);
1545                         prealloc_block.objectid = 0;
1546                         if (wret) {
1547                                 free_extent_buffer(b);
1548                                 ret = wret;
1549                                 goto done;
1550                         }
1551                 }
1552 cow_done:
1553                 BUG_ON(!cow && ins_len);
1554                 if (level != btrfs_header_level(b))
1555                         WARN_ON(1);
1556                 level = btrfs_header_level(b);
1557
1558                 p->nodes[level] = b;
1559                 if (!p->skip_locking)
1560                         p->locks[level] = 1;
1561
1562                 btrfs_clear_path_blocking(p);
1563
1564                 /*
1565                  * we have a lock on b and as long as we aren't changing
1566                  * the tree, there is no way to for the items in b to change.
1567                  * It is safe to drop the lock on our parent before we
1568                  * go through the expensive btree search on b.
1569                  *
1570                  * If cow is true, then we might be changing slot zero,
1571                  * which may require changing the parent.  So, we can't
1572                  * drop the lock until after we know which slot we're
1573                  * operating on.
1574                  */
1575                 if (!cow)
1576                         btrfs_unlock_up_safe(p, level + 1);
1577
1578                 ret = check_block(root, p, level);
1579                 if (ret) {
1580                         ret = -1;
1581                         goto done;
1582                 }
1583
1584                 ret = bin_search(b, key, level, &slot);
1585
1586                 if (level != 0) {
1587                         if (ret && slot > 0)
1588                                 slot -= 1;
1589                         p->slots[level] = slot;
1590                         if ((p->search_for_split || ins_len > 0) &&
1591                             btrfs_header_nritems(b) >=
1592                             BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1593                                 int sret;
1594
1595                                 sret = reada_for_balance(root, p, level);
1596                                 if (sret)
1597                                         goto again;
1598
1599                                 btrfs_set_path_blocking(p);
1600                                 sret = split_node(trans, root, p, level);
1601                                 btrfs_clear_path_blocking(p);
1602
1603                                 BUG_ON(sret > 0);
1604                                 if (sret) {
1605                                         ret = sret;
1606                                         goto done;
1607                                 }
1608                                 b = p->nodes[level];
1609                                 slot = p->slots[level];
1610                         } else if (ins_len < 0 &&
1611                                    btrfs_header_nritems(b) <
1612                                    BTRFS_NODEPTRS_PER_BLOCK(root) / 4) {
1613                                 int sret;
1614
1615                                 sret = reada_for_balance(root, p, level);
1616                                 if (sret)
1617                                         goto again;
1618
1619                                 btrfs_set_path_blocking(p);
1620                                 sret = balance_level(trans, root, p, level);
1621                                 btrfs_clear_path_blocking(p);
1622
1623                                 if (sret) {
1624                                         ret = sret;
1625                                         goto done;
1626                                 }
1627                                 b = p->nodes[level];
1628                                 if (!b) {
1629                                         btrfs_release_path(NULL, p);
1630                                         goto again;
1631                                 }
1632                                 slot = p->slots[level];
1633                                 BUG_ON(btrfs_header_nritems(b) == 1);
1634                         }
1635                         unlock_up(p, level, lowest_unlock);
1636
1637                         /* this is only true while dropping a snapshot */
1638                         if (level == lowest_level) {
1639                                 ret = 0;
1640                                 goto done;
1641                         }
1642
1643                         blocknr = btrfs_node_blockptr(b, slot);
1644                         gen = btrfs_node_ptr_generation(b, slot);
1645                         blocksize = btrfs_level_size(root, level - 1);
1646
1647                         tmp = btrfs_find_tree_block(root, blocknr, blocksize);
1648                         if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
1649                                 b = tmp;
1650                         } else {
1651                                 /*
1652                                  * reduce lock contention at high levels
1653                                  * of the btree by dropping locks before
1654                                  * we read.
1655                                  */
1656                                 if (level > 0) {
1657                                         btrfs_release_path(NULL, p);
1658                                         if (tmp)
1659                                                 free_extent_buffer(tmp);
1660                                         if (should_reada)
1661                                                 reada_for_search(root, p,
1662                                                                  level, slot,
1663                                                                  key->objectid);
1664
1665                                         tmp = read_tree_block(root, blocknr,
1666                                                          blocksize, gen);
1667                                         if (tmp)
1668                                                 free_extent_buffer(tmp);
1669                                         goto again;
1670                                 } else {
1671                                         btrfs_set_path_blocking(p);
1672                                         if (tmp)
1673                                                 free_extent_buffer(tmp);
1674                                         if (should_reada)
1675                                                 reada_for_search(root, p,
1676                                                                  level, slot,
1677                                                                  key->objectid);
1678                                         b = read_node_slot(root, b, slot);
1679                                 }
1680                         }
1681                         if (!p->skip_locking) {
1682                                 int lret;
1683
1684                                 btrfs_clear_path_blocking(p);
1685                                 lret = btrfs_try_spin_lock(b);
1686
1687                                 if (!lret) {
1688                                         btrfs_set_path_blocking(p);
1689                                         btrfs_tree_lock(b);
1690                                         btrfs_clear_path_blocking(p);
1691                                 }
1692                         }
1693                 } else {
1694                         p->slots[level] = slot;
1695                         if (ins_len > 0 &&
1696                             btrfs_leaf_free_space(root, b) < ins_len) {
1697                                 int sret;
1698
1699                                 btrfs_set_path_blocking(p);
1700                                 sret = split_leaf(trans, root, key,
1701                                                       p, ins_len, ret == 0);
1702                                 btrfs_clear_path_blocking(p);
1703
1704                                 BUG_ON(sret > 0);
1705                                 if (sret) {
1706                                         ret = sret;
1707                                         goto done;
1708                                 }
1709                         }
1710                         if (!p->search_for_split)
1711                                 unlock_up(p, level, lowest_unlock);
1712                         goto done;
1713                 }
1714         }
1715         ret = 1;
1716 done:
1717         /*
1718          * we don't really know what they plan on doing with the path
1719          * from here on, so for now just mark it as blocking
1720          */
1721         btrfs_set_path_blocking(p);
1722         if (prealloc_block.objectid) {
1723                 btrfs_free_reserved_extent(root,
1724                            prealloc_block.objectid,
1725                            prealloc_block.offset);
1726         }
1727         return ret;
1728 }
1729
1730 int btrfs_merge_path(struct btrfs_trans_handle *trans,
1731                      struct btrfs_root *root,
1732                      struct btrfs_key *node_keys,
1733                      u64 *nodes, int lowest_level)
1734 {
1735         struct extent_buffer *eb;
1736         struct extent_buffer *parent;
1737         struct btrfs_key key;
1738         u64 bytenr;
1739         u64 generation;
1740         u32 blocksize;
1741         int level;
1742         int slot;
1743         int key_match;
1744         int ret;
1745
1746         eb = btrfs_lock_root_node(root);
1747         ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0);
1748         BUG_ON(ret);
1749
1750         btrfs_set_lock_blocking(eb);
1751
1752         parent = eb;
1753         while (1) {
1754                 level = btrfs_header_level(parent);
1755                 if (level == 0 || level <= lowest_level)
1756                         break;
1757
1758                 ret = bin_search(parent, &node_keys[lowest_level], level,
1759                                  &slot);
1760                 if (ret && slot > 0)
1761                         slot--;
1762
1763                 bytenr = btrfs_node_blockptr(parent, slot);
1764                 if (nodes[level - 1] == bytenr)
1765                         break;
1766
1767                 blocksize = btrfs_level_size(root, level - 1);
1768                 generation = btrfs_node_ptr_generation(parent, slot);
1769                 btrfs_node_key_to_cpu(eb, &key, slot);
1770                 key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
1771
1772                 if (generation == trans->transid) {
1773                         eb = read_tree_block(root, bytenr, blocksize,
1774                                              generation);
1775                         btrfs_tree_lock(eb);
1776                         btrfs_set_lock_blocking(eb);
1777                 }
1778
1779                 /*
1780                  * if node keys match and node pointer hasn't been modified
1781                  * in the running transaction, we can merge the path. for
1782                  * blocks owened by reloc trees, the node pointer check is
1783                  * skipped, this is because these blocks are fully controlled
1784                  * by the space balance code, no one else can modify them.
1785                  */
1786                 if (!nodes[level - 1] || !key_match ||
1787                     (generation == trans->transid &&
1788                      btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) {
1789                         if (level == 1 || level == lowest_level + 1) {
1790                                 if (generation == trans->transid) {
1791                                         btrfs_tree_unlock(eb);
1792                                         free_extent_buffer(eb);
1793                                 }
1794                                 break;
1795                         }
1796
1797                         if (generation != trans->transid) {
1798                                 eb = read_tree_block(root, bytenr, blocksize,
1799                                                 generation);
1800                                 btrfs_tree_lock(eb);
1801                                 btrfs_set_lock_blocking(eb);
1802                         }
1803
1804                         ret = btrfs_cow_block(trans, root, eb, parent, slot,
1805                                               &eb, 0);
1806                         BUG_ON(ret);
1807
1808                         if (root->root_key.objectid ==
1809                             BTRFS_TREE_RELOC_OBJECTID) {
1810                                 if (!nodes[level - 1]) {
1811                                         nodes[level - 1] = eb->start;
1812                                         memcpy(&node_keys[level - 1], &key,
1813                                                sizeof(node_keys[0]));
1814                                 } else {
1815                                         WARN_ON(1);
1816                                 }
1817                         }
1818
1819                         btrfs_tree_unlock(parent);
1820                         free_extent_buffer(parent);
1821                         parent = eb;
1822                         continue;
1823                 }
1824
1825                 btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
1826                 btrfs_set_node_ptr_generation(parent, slot, trans->transid);
1827                 btrfs_mark_buffer_dirty(parent);
1828
1829                 ret = btrfs_inc_extent_ref(trans, root,
1830                                         nodes[level - 1],
1831                                         blocksize, parent->start,
1832                                         btrfs_header_owner(parent),
1833                                         btrfs_header_generation(parent),
1834                                         level - 1);
1835                 BUG_ON(ret);
1836
1837                 /*
1838                  * If the block was created in the running transaction,
1839                  * it's possible this is the last reference to it, so we
1840                  * should drop the subtree.
1841                  */
1842                 if (generation == trans->transid) {
1843                         ret = btrfs_drop_subtree(trans, root, eb, parent);
1844                         BUG_ON(ret);
1845                         btrfs_tree_unlock(eb);
1846                         free_extent_buffer(eb);
1847                 } else {
1848                         ret = btrfs_free_extent(trans, root, bytenr,
1849                                         blocksize, parent->start,
1850                                         btrfs_header_owner(parent),
1851                                         btrfs_header_generation(parent),
1852                                         level - 1, 1);
1853                         BUG_ON(ret);
1854                 }
1855                 break;
1856         }
1857         btrfs_tree_unlock(parent);
1858         free_extent_buffer(parent);
1859         return 0;
1860 }
1861
1862 /*
1863  * adjust the pointers going up the tree, starting at level
1864  * making sure the right key of each node is points to 'key'.
1865  * This is used after shifting pointers to the left, so it stops
1866  * fixing up pointers when a given leaf/node is not in slot 0 of the
1867  * higher levels
1868  *
1869  * If this fails to write a tree block, it returns -1, but continues
1870  * fixing up the blocks in ram so the tree is consistent.
1871  */
1872 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1873                           struct btrfs_root *root, struct btrfs_path *path,
1874                           struct btrfs_disk_key *key, int level)
1875 {
1876         int i;
1877         int ret = 0;
1878         struct extent_buffer *t;
1879
1880         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1881                 int tslot = path->slots[i];
1882                 if (!path->nodes[i])
1883                         break;
1884                 t = path->nodes[i];
1885                 btrfs_set_node_key(t, key, tslot);
1886                 btrfs_mark_buffer_dirty(path->nodes[i]);
1887                 if (tslot != 0)
1888                         break;
1889         }
1890         return ret;
1891 }
1892
1893 /*
1894  * update item key.
1895  *
1896  * This function isn't completely safe. It's the caller's responsibility
1897  * that the new key won't break the order
1898  */
1899 int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1900                             struct btrfs_root *root, struct btrfs_path *path,
1901                             struct btrfs_key *new_key)
1902 {
1903         struct btrfs_disk_key disk_key;
1904         struct extent_buffer *eb;
1905         int slot;
1906
1907         eb = path->nodes[0];
1908         slot = path->slots[0];
1909         if (slot > 0) {
1910                 btrfs_item_key(eb, &disk_key, slot - 1);
1911                 if (comp_keys(&disk_key, new_key) >= 0)
1912                         return -1;
1913         }
1914         if (slot < btrfs_header_nritems(eb) - 1) {
1915                 btrfs_item_key(eb, &disk_key, slot + 1);
1916                 if (comp_keys(&disk_key, new_key) <= 0)
1917                         return -1;
1918         }
1919
1920         btrfs_cpu_key_to_disk(&disk_key, new_key);
1921         btrfs_set_item_key(eb, &disk_key, slot);
1922         btrfs_mark_buffer_dirty(eb);
1923         if (slot == 0)
1924                 fixup_low_keys(trans, root, path, &disk_key, 1);
1925         return 0;
1926 }
1927
1928 /*
1929  * try to push data from one node into the next node left in the
1930  * tree.
1931  *
1932  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1933  * error, and > 0 if there was no room in the left hand block.
1934  */
1935 static int push_node_left(struct btrfs_trans_handle *trans,
1936                           struct btrfs_root *root, struct extent_buffer *dst,
1937                           struct extent_buffer *src, int empty)
1938 {
1939         int push_items = 0;
1940         int src_nritems;
1941         int dst_nritems;
1942         int ret = 0;
1943
1944         src_nritems = btrfs_header_nritems(src);
1945         dst_nritems = btrfs_header_nritems(dst);
1946         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1947         WARN_ON(btrfs_header_generation(src) != trans->transid);
1948         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1949
1950         if (!empty && src_nritems <= 8)
1951                 return 1;
1952
1953         if (push_items <= 0)
1954                 return 1;
1955
1956         if (empty) {
1957                 push_items = min(src_nritems, push_items);
1958                 if (push_items < src_nritems) {
1959                         /* leave at least 8 pointers in the node if
1960                          * we aren't going to empty it
1961                          */
1962                         if (src_nritems - push_items < 8) {
1963                                 if (push_items <= 8)
1964                                         return 1;
1965                                 push_items -= 8;
1966                         }
1967                 }
1968         } else
1969                 push_items = min(src_nritems - 8, push_items);
1970
1971         copy_extent_buffer(dst, src,
1972                            btrfs_node_key_ptr_offset(dst_nritems),
1973                            btrfs_node_key_ptr_offset(0),
1974                            push_items * sizeof(struct btrfs_key_ptr));
1975
1976         if (push_items < src_nritems) {
1977                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1978                                       btrfs_node_key_ptr_offset(push_items),
1979                                       (src_nritems - push_items) *
1980                                       sizeof(struct btrfs_key_ptr));
1981         }
1982         btrfs_set_header_nritems(src, src_nritems - push_items);
1983         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1984         btrfs_mark_buffer_dirty(src);
1985         btrfs_mark_buffer_dirty(dst);
1986
1987         ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items);
1988         BUG_ON(ret);
1989
1990         return ret;
1991 }
1992
1993 /*
1994  * try to push data from one node into the next node right in the
1995  * tree.
1996  *
1997  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1998  * error, and > 0 if there was no room in the right hand block.
1999  *
2000  * this will  only push up to 1/2 the contents of the left node over
2001  */
2002 static int balance_node_right(struct btrfs_trans_handle *trans,
2003                               struct btrfs_root *root,
2004                               struct extent_buffer *dst,
2005                               struct extent_buffer *src)
2006 {
2007         int push_items = 0;
2008         int max_push;
2009         int src_nritems;
2010         int dst_nritems;
2011         int ret = 0;
2012
2013         WARN_ON(btrfs_header_generation(src) != trans->transid);
2014         WARN_ON(btrfs_header_generation(dst) != trans->transid);
2015
2016         src_nritems = btrfs_header_nritems(src);
2017         dst_nritems = btrfs_header_nritems(dst);
2018         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
2019         if (push_items <= 0)
2020                 return 1;
2021
2022         if (src_nritems < 4)
2023                 return 1;
2024
2025         max_push = src_nritems / 2 + 1;
2026         /* don't try to empty the node */
2027         if (max_push >= src_nritems)
2028                 return 1;
2029
2030         if (max_push < push_items)
2031                 push_items = max_push;
2032
2033         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
2034                                       btrfs_node_key_ptr_offset(0),
2035                                       (dst_nritems) *
2036                                       sizeof(struct btrfs_key_ptr));
2037
2038         copy_extent_buffer(dst, src,
2039                            btrfs_node_key_ptr_offset(0),
2040                            btrfs_node_key_ptr_offset(src_nritems - push_items),
2041                            push_items * sizeof(struct btrfs_key_ptr));
2042
2043         btrfs_set_header_nritems(src, src_nritems - push_items);
2044         btrfs_set_header_nritems(dst, dst_nritems + push_items);
2045
2046         btrfs_mark_buffer_dirty(src);
2047         btrfs_mark_buffer_dirty(dst);
2048
2049         ret = btrfs_update_ref(trans, root, src, dst, 0, push_items);
2050         BUG_ON(ret);
2051
2052         return ret;
2053 }
2054
2055 /*
2056  * helper function to insert a new root level in the tree.
2057  * A new node is allocated, and a single item is inserted to
2058  * point to the existing root
2059  *
2060  * returns zero on success or < 0 on failure.
2061  */
2062 static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2063                            struct btrfs_root *root,
2064                            struct btrfs_path *path, int level)
2065 {
2066         u64 lower_gen;
2067         struct extent_buffer *lower;
2068         struct extent_buffer *c;
2069         struct extent_buffer *old;
2070         struct btrfs_disk_key lower_key;
2071         int ret;
2072
2073         BUG_ON(path->nodes[level]);
2074         BUG_ON(path->nodes[level-1] != root->node);
2075
2076         lower = path->nodes[level-1];
2077         if (level == 1)
2078                 btrfs_item_key(lower, &lower_key, 0);
2079         else
2080                 btrfs_node_key(lower, &lower_key, 0);
2081
2082         c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
2083                                    root->root_key.objectid, trans->transid,
2084                                    level, root->node->start, 0);
2085         if (IS_ERR(c))
2086                 return PTR_ERR(c);
2087
2088         memset_extent_buffer(c, 0, 0, root->nodesize);
2089         btrfs_set_header_nritems(c, 1);
2090         btrfs_set_header_level(c, level);
2091         btrfs_set_header_bytenr(c, c->start);
2092         btrfs_set_header_generation(c, trans->transid);
2093         btrfs_set_header_owner(c, root->root_key.objectid);
2094
2095         write_extent_buffer(c, root->fs_info->fsid,
2096                             (unsigned long)btrfs_header_fsid(c),
2097                             BTRFS_FSID_SIZE);
2098
2099         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
2100                             (unsigned long)btrfs_header_chunk_tree_uuid(c),
2101                             BTRFS_UUID_SIZE);
2102
2103         btrfs_set_node_key(c, &lower_key, 0);
2104         btrfs_set_node_blockptr(c, 0, lower->start);
2105         lower_gen = btrfs_header_generation(lower);
2106         WARN_ON(lower_gen != trans->transid);
2107
2108         btrfs_set_node_ptr_generation(c, 0, lower_gen);
2109
2110         btrfs_mark_buffer_dirty(c);
2111
2112         spin_lock(&root->node_lock);
2113         old = root->node;
2114         root->node = c;
2115         spin_unlock(&root->node_lock);
2116
2117         ret = btrfs_update_extent_ref(trans, root, lower->start,
2118                                       lower->start, c->start,
2119                                       root->root_key.objectid,
2120                                       trans->transid, level - 1);
2121         BUG_ON(ret);
2122
2123         /* the super has an extra ref to root->node */
2124         free_extent_buffer(old);
2125
2126         add_root_to_dirty_list(root);
2127         extent_buffer_get(c);
2128         path->nodes[level] = c;
2129         path->locks[level] = 1;
2130         path->slots[level] = 0;
2131         return 0;
2132 }
2133
2134 /*
2135  * worker function to insert a single pointer in a node.
2136  * the node should have enough room for the pointer already
2137  *
2138  * slot and level indicate where you want the key to go, and
2139  * blocknr is the block the key points to.
2140  *
2141  * returns zero on success and < 0 on any error
2142  */
2143 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
2144                       *root, struct btrfs_path *path, struct btrfs_disk_key
2145                       *key, u64 bytenr, int slot, int level)
2146 {
2147         struct extent_buffer *lower;
2148         int nritems;
2149
2150         BUG_ON(!path->nodes[level]);
2151         lower = path->nodes[level];
2152         nritems = btrfs_header_nritems(lower);
2153         if (slot > nritems)
2154                 BUG();
2155         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
2156                 BUG();
2157         if (slot != nritems) {
2158                 memmove_extent_buffer(lower,
2159                               btrfs_node_key_ptr_offset(slot + 1),
2160                               btrfs_node_key_ptr_offset(slot),
2161                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
2162         }
2163         btrfs_set_node_key(lower, key, slot);
2164         btrfs_set_node_blockptr(lower, slot, bytenr);
2165         WARN_ON(trans->transid == 0);
2166         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
2167         btrfs_set_header_nritems(lower, nritems + 1);
2168         btrfs_mark_buffer_dirty(lower);
2169         return 0;
2170 }
2171
2172 /*
2173  * split the node at the specified level in path in two.
2174  * The path is corrected to point to the appropriate node after the split
2175  *
2176  * Before splitting this tries to make some room in the node by pushing
2177  * left and right, if either one works, it returns right away.
2178  *
2179  * returns 0 on success and < 0 on failure
2180  */
2181 static noinline int split_node(struct btrfs_trans_handle *trans,
2182                                struct btrfs_root *root,
2183                                struct btrfs_path *path, int level)
2184 {
2185         struct extent_buffer *c;
2186         struct extent_buffer *split;
2187         struct btrfs_disk_key disk_key;
2188         int mid;
2189         int ret;
2190         int wret;
2191         u32 c_nritems;
2192
2193         c = path->nodes[level];
2194         WARN_ON(btrfs_header_generation(c) != trans->transid);
2195         if (c == root->node) {
2196                 /* trying to split the root, lets make a new one */
2197                 ret = insert_new_root(trans, root, path, level + 1);
2198                 if (ret)
2199                         return ret;
2200         } else {
2201                 ret = push_nodes_for_insert(trans, root, path, level);
2202                 c = path->nodes[level];
2203                 if (!ret && btrfs_header_nritems(c) <
2204                     BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
2205                         return 0;
2206                 if (ret < 0)
2207                         return ret;
2208         }
2209
2210         c_nritems = btrfs_header_nritems(c);
2211
2212         split = btrfs_alloc_free_block(trans, root, root->nodesize,
2213                                         path->nodes[level + 1]->start,
2214                                         root->root_key.objectid,
2215                                         trans->transid, level, c->start, 0);
2216         if (IS_ERR(split))
2217                 return PTR_ERR(split);
2218
2219         btrfs_set_header_flags(split, btrfs_header_flags(c));
2220         btrfs_set_header_level(split, btrfs_header_level(c));
2221         btrfs_set_header_bytenr(split, split->start);
2222         btrfs_set_header_generation(split, trans->transid);
2223         btrfs_set_header_owner(split, root->root_key.objectid);
2224         btrfs_set_header_flags(split, 0);
2225         write_extent_buffer(split, root->fs_info->fsid,
2226                             (unsigned long)btrfs_header_fsid(split),
2227                             BTRFS_FSID_SIZE);
2228         write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
2229                             (unsigned long)btrfs_header_chunk_tree_uuid(split),
2230                             BTRFS_UUID_SIZE);
2231
2232         mid = (c_nritems + 1) / 2;
2233
2234         copy_extent_buffer(split, c,
2235                            btrfs_node_key_ptr_offset(0),
2236                            btrfs_node_key_ptr_offset(mid),
2237                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
2238         btrfs_set_header_nritems(split, c_nritems - mid);
2239         btrfs_set_header_nritems(c, mid);
2240         ret = 0;
2241
2242         btrfs_mark_buffer_dirty(c);
2243         btrfs_mark_buffer_dirty(split);
2244
2245         btrfs_node_key(split, &disk_key, 0);
2246         wret = insert_ptr(trans, root, path, &disk_key, split->start,
2247                           path->slots[level + 1] + 1,
2248                           level + 1);
2249         if (wret)
2250                 ret = wret;
2251
2252         ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid);
2253         BUG_ON(ret);
2254
2255         if (path->slots[level] >= mid) {
2256                 path->slots[level] -= mid;
2257                 btrfs_tree_unlock(c);
2258                 free_extent_buffer(c);
2259                 path->nodes[level] = split;
2260                 path->slots[level + 1] += 1;
2261         } else {
2262                 btrfs_tree_unlock(split);
2263                 free_extent_buffer(split);
2264         }
2265         return ret;
2266 }
2267
2268 /*
2269  * how many bytes are required to store the items in a leaf.  start
2270  * and nr indicate which items in the leaf to check.  This totals up the
2271  * space used both by the item structs and the item data
2272  */
2273 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
2274 {
2275         int data_len;
2276         int nritems = btrfs_header_nritems(l);
2277         int end = min(nritems, start + nr) - 1;
2278
2279         if (!nr)
2280                 return 0;
2281         data_len = btrfs_item_end_nr(l, start);
2282         data_len = data_len - btrfs_item_offset_nr(l, end);
2283         data_len += sizeof(struct btrfs_item) * nr;
2284         WARN_ON(data_len < 0);
2285         return data_len;
2286 }
2287
2288 /*
2289  * The space between the end of the leaf items and
2290  * the start of the leaf data.  IOW, how much room
2291  * the leaf has left for both items and data
2292  */
2293 noinline int btrfs_leaf_free_space(struct btrfs_root *root,
2294                                    struct extent_buffer *leaf)
2295 {
2296         int nritems = btrfs_header_nritems(leaf);
2297         int ret;
2298         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
2299         if (ret < 0) {
2300                 printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, "
2301                        "used %d nritems %d\n",
2302                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
2303                        leaf_space_used(leaf, 0, nritems), nritems);
2304         }
2305         return ret;
2306 }
2307
2308 /*
2309  * push some data in the path leaf to the right, trying to free up at
2310  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
2311  *
2312  * returns 1 if the push failed because the other node didn't have enough
2313  * room, 0 if everything worked out and < 0 if there were major errors.
2314  */
2315 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2316                            *root, struct btrfs_path *path, int data_size,
2317                            int empty)
2318 {
2319         struct extent_buffer *left = path->nodes[0];
2320         struct extent_buffer *right;
2321         struct extent_buffer *upper;
2322         struct btrfs_disk_key disk_key;
2323         int slot;
2324         u32 i;
2325         int free_space;
2326         int push_space = 0;
2327         int push_items = 0;
2328         struct btrfs_item *item;
2329         u32 left_nritems;
2330         u32 nr;
2331         u32 right_nritems;
2332         u32 data_end;
2333         u32 this_item_size;
2334         int ret;
2335
2336         slot = path->slots[1];
2337         if (!path->nodes[1])
2338                 return 1;
2339
2340         upper = path->nodes[1];
2341         if (slot >= btrfs_header_nritems(upper) - 1)
2342                 return 1;
2343
2344         WARN_ON(!btrfs_tree_locked(path->nodes[1]));
2345
2346         right = read_node_slot(root, upper, slot + 1);
2347         btrfs_tree_lock(right);
2348         btrfs_set_lock_blocking(right);
2349
2350         free_space = btrfs_leaf_free_space(root, right);
2351         if (free_space < data_size)
2352                 goto out_unlock;
2353
2354         /* cow and double check */
2355         ret = btrfs_cow_block(trans, root, right, upper,
2356                               slot + 1, &right, 0);
2357         if (ret)
2358                 goto out_unlock;
2359
2360         free_space = btrfs_leaf_free_space(root, right);
2361         if (free_space < data_size)
2362                 goto out_unlock;
2363
2364         left_nritems = btrfs_header_nritems(left);
2365         if (left_nritems == 0)
2366                 goto out_unlock;
2367
2368         if (empty)
2369                 nr = 0;
2370         else
2371                 nr = 1;
2372
2373         if (path->slots[0] >= left_nritems)
2374                 push_space += data_size;
2375
2376         i = left_nritems - 1;
2377         while (i >= nr) {
2378                 item = btrfs_item_nr(left, i);
2379
2380                 if (!empty && push_items > 0) {
2381                         if (path->slots[0] > i)
2382                                 break;
2383                         if (path->slots[0] == i) {
2384                                 int space = btrfs_leaf_free_space(root, left);
2385                                 if (space + push_space * 2 > free_space)
2386                                         break;
2387                         }
2388                 }
2389
2390                 if (path->slots[0] == i)
2391                         push_space += data_size;
2392
2393                 if (!left->map_token) {
2394                         map_extent_buffer(left, (unsigned long)item,
2395                                         sizeof(struct btrfs_item),
2396                                         &left->map_token, &left->kaddr,
2397                                         &left->map_start, &left->map_len,
2398                                         KM_USER1);
2399                 }
2400
2401                 this_item_size = btrfs_item_size(left, item);
2402                 if (this_item_size + sizeof(*item) + push_space > free_space)
2403                         break;
2404
2405                 push_items++;
2406                 push_space += this_item_size + sizeof(*item);
2407                 if (i == 0)
2408                         break;
2409                 i--;
2410         }
2411         if (left->map_token) {
2412                 unmap_extent_buffer(left, left->map_token, KM_USER1);
2413                 left->map_token = NULL;
2414         }
2415
2416         if (push_items == 0)
2417                 goto out_unlock;
2418
2419         if (!empty && push_items == left_nritems)
2420                 WARN_ON(1);
2421
2422         /* push left to right */
2423         right_nritems = btrfs_header_nritems(right);
2424
2425         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
2426         push_space -= leaf_data_end(root, left);
2427
2428         /* make room in the right data area */
2429         data_end = leaf_data_end(root, right);
2430         memmove_extent_buffer(right,
2431                               btrfs_leaf_data(right) + data_end - push_space,
2432                               btrfs_leaf_data(right) + data_end,
2433                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
2434
2435         /* copy from the left data area */
2436         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
2437                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
2438                      btrfs_leaf_data(left) + leaf_data_end(root, left),
2439                      push_space);
2440
2441         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
2442                               btrfs_item_nr_offset(0),
2443                               right_nritems * sizeof(struct btrfs_item));
2444
2445         /* copy the items from left to right */
2446         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
2447                    btrfs_item_nr_offset(left_nritems - push_items),
2448                    push_items * sizeof(struct btrfs_item));
2449
2450         /* update the item pointers */
2451         right_nritems += push_items;
2452         btrfs_set_header_nritems(right, right_nritems);
2453         push_space = BTRFS_LEAF_DATA_SIZE(root);
2454         for (i = 0; i < right_nritems; i++) {
2455                 item = btrfs_item_nr(right, i);
2456                 if (!right->map_token) {
2457                         map_extent_buffer(right, (unsigned long)item,
2458                                         sizeof(struct btrfs_item),
2459                                         &right->map_token, &right->kaddr,
2460                                         &right->map_start, &right->map_len,
2461                                         KM_USER1);
2462                 }
2463                 push_space -= btrfs_item_size(right, item);
2464                 btrfs_set_item_offset(right, item, push_space);
2465         }
2466
2467         if (right->map_token) {
2468                 unmap_extent_buffer(right, right->map_token, KM_USER1);
2469                 right->map_token = NULL;
2470         }
2471         left_nritems -= push_items;
2472         btrfs_set_header_nritems(left, left_nritems);
2473
2474         if (left_nritems)
2475                 btrfs_mark_buffer_dirty(left);
2476         btrfs_mark_buffer_dirty(right);
2477
2478         ret = btrfs_update_ref(trans, root, left, right, 0, push_items);
2479         BUG_ON(ret);
2480
2481         btrfs_item_key(right, &disk_key, 0);
2482         btrfs_set_node_key(upper, &disk_key, slot + 1);
2483         btrfs_mark_buffer_dirty(upper);
2484
2485         /* then fixup the leaf pointer in the path */
2486         if (path->slots[0] >= left_nritems) {
2487                 path->slots[0] -= left_nritems;
2488                 if (btrfs_header_nritems(path->nodes[0]) == 0)
2489                         clean_tree_block(trans, root, path->nodes[0]);
2490                 btrfs_tree_unlock(path->nodes[0]);
2491                 free_extent_buffer(path->nodes[0]);
2492                 path->nodes[0] = right;
2493                 path->slots[1] += 1;
2494         } else {
2495                 btrfs_tree_unlock(right);
2496                 free_extent_buffer(right);
2497         }
2498         return 0;
2499
2500 out_unlock:
2501         btrfs_tree_unlock(right);
2502         free_extent_buffer(right);
2503         return 1;
2504 }
2505
2506 /*
2507  * push some data in the path leaf to the left, trying to free up at
2508  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
2509  */
2510 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2511                           *root, struct btrfs_path *path, int data_size,
2512                           int empty)
2513 {
2514         struct btrfs_disk_key disk_key;
2515         struct extent_buffer *right = path->nodes[0];
2516         struct extent_buffer *left;
2517         int slot;
2518         int i;
2519         int free_space;
2520         int push_space = 0;
2521         int push_items = 0;
2522         struct btrfs_item *item;
2523         u32 old_left_nritems;
2524         u32 right_nritems;
2525         u32 nr;
2526         int ret = 0;
2527         int wret;
2528         u32 this_item_size;
2529         u32 old_left_item_size;
2530
2531         slot = path->slots[1];
2532         if (slot == 0)
2533                 return 1;
2534         if (!path->nodes[1])
2535                 return 1;
2536
2537         right_nritems = btrfs_header_nritems(right);
2538         if (right_nritems == 0)
2539                 return 1;
2540
2541         WARN_ON(!btrfs_tree_locked(path->nodes[1]));
2542
2543         left = read_node_slot(root, path->nodes[1], slot - 1);
2544         btrfs_tree_lock(left);
2545         btrfs_set_lock_blocking(left);
2546
2547         free_space = btrfs_leaf_free_space(root, left);
2548         if (free_space < data_size) {
2549                 ret = 1;
2550                 goto out;
2551         }
2552
2553         /* cow and double check */
2554         ret = btrfs_cow_block(trans, root, left,
2555                               path->nodes[1], slot - 1, &left, 0);
2556         if (ret) {
2557                 /* we hit -ENOSPC, but it isn't fatal here */
2558                 ret = 1;
2559                 goto out;
2560         }
2561
2562         free_space = btrfs_leaf_free_space(root, left);
2563         if (free_space < data_size) {
2564                 ret = 1;
2565                 goto out;
2566         }
2567
2568         if (empty)
2569                 nr = right_nritems;
2570         else
2571                 nr = right_nritems - 1;
2572
2573         for (i = 0; i < nr; i++) {
2574                 item = btrfs_item_nr(right, i);
2575                 if (!right->map_token) {
2576                         map_extent_buffer(right, (unsigned long)item,
2577                                         sizeof(struct btrfs_item),
2578                                         &right->map_token, &right->kaddr,
2579                                         &right->map_start, &right->map_len,
2580                                         KM_USER1);
2581                 }
2582
2583                 if (!empty && push_items > 0) {
2584                         if (path->slots[0] < i)
2585                                 break;
2586                         if (path->slots[0] == i) {
2587                                 int space = btrfs_leaf_free_space(root, right);
2588                                 if (space + push_space * 2 > free_space)
2589                                         break;
2590                         }
2591                 }
2592
2593                 if (path->slots[0] == i)
2594                         push_space += data_size;
2595
2596                 this_item_size = btrfs_item_size(right, item);
2597                 if (this_item_size + sizeof(*item) + push_space > free_space)
2598                         break;
2599
2600                 push_items++;
2601                 push_space += this_item_size + sizeof(*item);
2602         }
2603
2604         if (right->map_token) {
2605                 unmap_extent_buffer(right, right->map_token, KM_USER1);
2606                 right->map_token = NULL;
2607         }
2608
2609         if (push_items == 0) {
2610                 ret = 1;
2611                 goto out;
2612         }
2613         if (!empty && push_items == btrfs_header_nritems(right))
2614                 WARN_ON(1);
2615
2616         /* push data from right to left */
2617         copy_extent_buffer(left, right,
2618                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
2619                            btrfs_item_nr_offset(0),
2620                            push_items * sizeof(struct btrfs_item));
2621
2622         push_space = BTRFS_LEAF_DATA_SIZE(root) -
2623                      btrfs_item_offset_nr(right, push_items - 1);
2624
2625         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
2626                      leaf_data_end(root, left) - push_space,
2627                      btrfs_leaf_data(right) +
2628                      btrfs_item_offset_nr(right, push_items - 1),
2629                      push_space);
2630         old_left_nritems = btrfs_header_nritems(left);
2631         BUG_ON(old_left_nritems <= 0);
2632
2633         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
2634         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
2635                 u32 ioff;
2636
2637                 item = btrfs_item_nr(left, i);
2638                 if (!left->map_token) {
2639                         map_extent_buffer(left, (unsigned long)item,
2640                                         sizeof(struct btrfs_item),
2641                                         &left->map_token, &left->kaddr,
2642                                         &left->map_start, &left->map_len,
2643                                         KM_USER1);
2644                 }
2645
2646                 ioff = btrfs_item_offset(left, item);
2647                 btrfs_set_item_offset(left, item,
2648                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
2649         }
2650         btrfs_set_header_nritems(left, old_left_nritems + push_items);
2651         if (left->map_token) {
2652                 unmap_extent_buffer(left, left->map_token, KM_USER1);
2653                 left->map_token = NULL;
2654         }
2655
2656         /* fixup right node */
2657         if (push_items > right_nritems) {
2658                 printk(KERN_CRIT "push items %d nr %u\n", push_items,
2659                        right_nritems);
2660                 WARN_ON(1);
2661         }
2662
2663         if (push_items < right_nritems) {
2664                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
2665                                                   leaf_data_end(root, right);
2666                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
2667                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
2668                                       btrfs_leaf_data(right) +
2669                                       leaf_data_end(root, right), push_space);
2670
2671                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
2672                               btrfs_item_nr_offset(push_items),
2673                              (btrfs_header_nritems(right) - push_items) *
2674                              sizeof(struct btrfs_item));
2675         }
2676         right_nritems -= push_items;
2677         btrfs_set_header_nritems(right, right_nritems);
2678         push_space = BTRFS_LEAF_DATA_SIZE(root);
2679         for (i = 0; i < right_nritems; i++) {
2680                 item = btrfs_item_nr(right, i);
2681
2682                 if (!right->map_token) {
2683                         map_extent_buffer(right, (unsigned long)item,
2684                                         sizeof(struct btrfs_item),
2685                                         &right->map_token, &right->kaddr,
2686                                         &right->map_start, &right->map_len,
2687                                         KM_USER1);
2688                 }
2689
2690                 push_space = push_space - btrfs_item_size(right, item);
2691                 btrfs_set_item_offset(right, item, push_space);
2692         }
2693         if (right->map_token) {
2694                 unmap_extent_buffer(right, right->map_token, KM_USER1);
2695                 right->map_token = NULL;
2696         }
2697
2698         btrfs_mark_buffer_dirty(left);
2699         if (right_nritems)
2700                 btrfs_mark_buffer_dirty(right);
2701
2702         ret = btrfs_update_ref(trans, root, right, left,
2703                                old_left_nritems, push_items);
2704         BUG_ON(ret);
2705
2706         btrfs_item_key(right, &disk_key, 0);
2707         wret = fixup_low_keys(trans, root, path, &disk_key, 1);
2708         if (wret)
2709                 ret = wret;
2710
2711         /* then fixup the leaf pointer in the path */
2712         if (path->slots[0] < push_items) {
2713                 path->slots[0] += old_left_nritems;
2714                 if (btrfs_header_nritems(path->nodes[0]) == 0)
2715                         clean_tree_block(trans, root, path->nodes[0]);
2716                 btrfs_tree_unlock(path->nodes[0]);
2717                 free_extent_buffer(path->nodes[0]);
2718                 path->nodes[0] = left;
2719                 path->slots[1] -= 1;
2720         } else {
2721                 btrfs_tree_unlock(left);
2722                 free_extent_buffer(left);
2723                 path->slots[0] -= push_items;
2724         }
2725         BUG_ON(path->slots[0] < 0);
2726         return ret;
2727 out:
2728         btrfs_tree_unlock(left);
2729         free_extent_buffer(left);
2730         return ret;
2731 }
2732
2733 /*
2734  * split the path's leaf in two, making sure there is at least data_size
2735  * available for the resulting leaf level of the path.
2736  *
2737  * returns 0 if all went well and < 0 on failure.
2738  */
2739 static noinline int split_leaf(struct btrfs_trans_handle *trans,
2740                                struct btrfs_root *root,
2741                                struct btrfs_key *ins_key,
2742                                struct btrfs_path *path, int data_size,
2743                                int extend)
2744 {
2745         struct extent_buffer *l;
2746         u32 nritems;
2747         int mid;
2748         int slot;
2749         struct extent_buffer *right;
2750         int data_copy_size;
2751         int rt_data_off;
2752         int i;
2753         int ret = 0;
2754         int wret;
2755         int double_split;
2756         int num_doubles = 0;
2757         struct btrfs_disk_key disk_key;
2758
2759         /* first try to make some room by pushing left and right */
2760         if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
2761                 wret = push_leaf_right(trans, root, path, data_size, 0);
2762                 if (wret < 0)
2763                         return wret;
2764                 if (wret) {
2765                         wret = push_leaf_left(trans, root, path, data_size, 0);
2766                         if (wret < 0)
2767                                 return wret;
2768                 }
2769                 l = path->nodes[0];
2770
2771                 /* did the pushes work? */
2772                 if (btrfs_leaf_free_space(root, l) >= data_size)
2773                         return 0;
2774         }
2775
2776         if (!path->nodes[1]) {
2777                 ret = insert_new_root(trans, root, path, 1);
2778                 if (ret)
2779                         return ret;
2780         }
2781 again:
2782         double_split = 0;
2783         l = path->nodes[0];
2784         slot = path->slots[0];
2785         nritems = btrfs_header_nritems(l);
2786         mid = (nritems + 1) / 2;
2787
2788         right = btrfs_alloc_free_block(trans, root, root->leafsize,
2789                                         path->nodes[1]->start,
2790                                         root->root_key.objectid,
2791                                         trans->transid, 0, l->start, 0);
2792         if (IS_ERR(right)) {
2793                 BUG_ON(1);
2794                 return PTR_ERR(right);
2795         }
2796
2797         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2798         btrfs_set_header_bytenr(right, right->start);
2799         btrfs_set_header_generation(right, trans->transid);
2800         btrfs_set_header_owner(right, root->root_key.objectid);
2801         btrfs_set_header_level(right, 0);
2802         write_extent_buffer(right, root->fs_info->fsid,
2803                             (unsigned long)btrfs_header_fsid(right),
2804                             BTRFS_FSID_SIZE);
2805
2806         write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
2807                             (unsigned long)btrfs_header_chunk_tree_uuid(right),
2808                             BTRFS_UUID_SIZE);
2809         if (mid <= slot) {
2810                 if (nritems == 1 ||
2811                     leaf_space_used(l, mid, nritems - mid) + data_size >
2812                         BTRFS_LEAF_DATA_SIZE(root)) {
2813                         if (slot >= nritems) {
2814                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2815                                 btrfs_set_header_nritems(right, 0);
2816                                 wret = insert_ptr(trans, root, path,
2817                                                   &disk_key, right->start,
2818                                                   path->slots[1] + 1, 1);
2819                                 if (wret)
2820                                         ret = wret;
2821
2822                                 btrfs_tree_unlock(path->nodes[0]);
2823                                 free_extent_buffer(path->nodes[0]);
2824                                 path->nodes[0] = right;
2825                                 path->slots[0] = 0;
2826                                 path->slots[1] += 1;
2827                                 btrfs_mark_buffer_dirty(right);
2828                                 return ret;
2829                         }
2830                         mid = slot;
2831                         if (mid != nritems &&
2832                             leaf_space_used(l, mid, nritems - mid) +
2833                             data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2834                                 double_split = 1;
2835                         }
2836                 }
2837         } else {
2838                 if (leaf_space_used(l, 0, mid) + data_size >
2839                         BTRFS_LEAF_DATA_SIZE(root)) {
2840                         if (!extend && data_size && slot == 0) {
2841                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2842                                 btrfs_set_header_nritems(right, 0);
2843                                 wret = insert_ptr(trans, root, path,
2844                                                   &disk_key,
2845                                                   right->start,
2846                                                   path->slots[1], 1);
2847                                 if (wret)
2848                                         ret = wret;
2849                                 btrfs_tree_unlock(path->nodes[0]);
2850                                 free_extent_buffer(path->nodes[0]);
2851                                 path->nodes[0] = right;
2852                                 path->slots[0] = 0;
2853                                 if (path->slots[1] == 0) {
2854                                         wret = fixup_low_keys(trans, root,
2855                                                       path, &disk_key, 1);
2856                                         if (wret)
2857                                                 ret = wret;
2858                                 }
2859                                 btrfs_mark_buffer_dirty(right);
2860                                 return ret;
2861                         } else if ((extend || !data_size) && slot == 0) {
2862                                 mid = 1;
2863                         } else {
2864                                 mid = slot;
2865                                 if (mid != nritems &&
2866                                     leaf_space_used(l, mid, nritems - mid) +
2867                                     data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2868                                         double_split = 1;
2869                                 }
2870                         }
2871                 }
2872         }
2873         nritems = nritems - mid;
2874         btrfs_set_header_nritems(right, nritems);
2875         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2876
2877         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2878                            btrfs_item_nr_offset(mid),
2879                            nritems * sizeof(struct btrfs_item));
2880
2881         copy_extent_buffer(right, l,
2882                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2883                      data_copy_size, btrfs_leaf_data(l) +
2884                      leaf_data_end(root, l), data_copy_size);
2885
2886         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2887                       btrfs_item_end_nr(l, mid);
2888
2889         for (i = 0; i < nritems; i++) {
2890                 struct btrfs_item *item = btrfs_item_nr(right, i);
2891                 u32 ioff;
2892
2893                 if (!right->map_token) {
2894                         map_extent_buffer(right, (unsigned long)item,
2895                                         sizeof(struct btrfs_item),
2896                                         &right->map_token, &right->kaddr,
2897                                         &right->map_start, &right->map_len,
2898                                         KM_USER1);
2899                 }
2900
2901                 ioff = btrfs_item_offset(right, item);
2902                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2903         }
2904
2905         if (right->map_token) {
2906                 unmap_extent_buffer(right, right->map_token, KM_USER1);
2907                 right->map_token = NULL;
2908         }
2909
2910         btrfs_set_header_nritems(l, mid);
2911         ret = 0;
2912         btrfs_item_key(right, &disk_key, 0);
2913         wret = insert_ptr(trans, root, path, &disk_key, right->start,
2914                           path->slots[1] + 1, 1);
2915         if (wret)
2916                 ret = wret;
2917
2918         btrfs_mark_buffer_dirty(right);
2919         btrfs_mark_buffer_dirty(l);
2920         BUG_ON(path->slots[0] != slot);
2921
2922         ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
2923         BUG_ON(ret);
2924
2925         if (mid <= slot) {
2926                 btrfs_tree_unlock(path->nodes[0]);
2927                 free_extent_buffer(path->nodes[0]);
2928                 path->nodes[0] = right;
2929                 path->slots[0] -= mid;
2930                 path->slots[1] += 1;
2931         } else {
2932                 btrfs_tree_unlock(right);
2933                 free_extent_buffer(right);
2934         }
2935
2936         BUG_ON(path->slots[0] < 0);
2937
2938         if (double_split) {
2939                 BUG_ON(num_doubles != 0);
2940                 num_doubles++;
2941                 goto again;
2942         }
2943         return ret;
2944 }
2945
2946 /*
2947  * This function splits a single item into two items,
2948  * giving 'new_key' to the new item and splitting the
2949  * old one at split_offset (from the start of the item).
2950  *
2951  * The path may be released by this operation.  After
2952  * the split, the path is pointing to the old item.  The
2953  * new item is going to be in the same node as the old one.
2954  *
2955  * Note, the item being split must be smaller enough to live alone on
2956  * a tree block with room for one extra struct btrfs_item
2957  *
2958  * This allows us to split the item in place, keeping a lock on the
2959  * leaf the entire time.
2960  */
2961 int btrfs_split_item(struct btrfs_trans_handle *trans,
2962                      struct btrfs_root *root,
2963                      struct btrfs_path *path,
2964                      struct btrfs_key *new_key,
2965                      unsigned long split_offset)
2966 {
2967         u32 item_size;
2968         struct extent_buffer *leaf;
2969         struct btrfs_key orig_key;
2970         struct btrfs_item *item;
2971         struct btrfs_item *new_item;
2972         int ret = 0;
2973         int slot;
2974         u32 nritems;
2975         u32 orig_offset;
2976         struct btrfs_disk_key disk_key;
2977         char *buf;
2978
2979         leaf = path->nodes[0];
2980         btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]);
2981         if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item))
2982                 goto split;
2983
2984         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2985         btrfs_release_path(root, path);
2986
2987         path->search_for_split = 1;
2988         path->keep_locks = 1;
2989
2990         ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1);
2991         path->search_for_split = 0;
2992
2993         /* if our item isn't there or got smaller, return now */
2994         if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0],
2995                                                         path->slots[0])) {
2996                 path->keep_locks = 0;
2997                 return -EAGAIN;
2998         }
2999
3000         ret = split_leaf(trans, root, &orig_key, path,
3001                          sizeof(struct btrfs_item), 1);
3002         path->keep_locks = 0;
3003         BUG_ON(ret);
3004
3005         /*
3006          * make sure any changes to the path from split_leaf leave it
3007          * in a blocking state
3008          */
3009         btrfs_set_path_blocking(path);
3010
3011         leaf = path->nodes[0];
3012         BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
3013
3014 split:
3015         item = btrfs_item_nr(leaf, path->slots[0]);
3016         orig_offset = btrfs_item_offset(leaf, item);
3017         item_size = btrfs_item_size(leaf, item);
3018
3019
3020         buf = kmalloc(item_size, GFP_NOFS);
3021         read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3022                             path->slots[0]), item_size);
3023         slot = path->slots[0] + 1;
3024         leaf = path->nodes[0];
3025
3026         nritems = btrfs_header_nritems(leaf);
3027
3028         if (slot != nritems) {
3029                 /* shift the items */
3030                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
3031                               btrfs_item_nr_offset(slot),
3032                               (nritems - slot) * sizeof(struct btrfs_item));
3033
3034         }
3035
3036         btrfs_cpu_key_to_disk(&disk_key, new_key);
3037         btrfs_set_item_key(leaf, &disk_key, slot);
3038
3039         new_item = btrfs_item_nr(leaf, slot);
3040
3041         btrfs_set_item_offset(leaf, new_item, orig_offset);
3042         btrfs_set_item_size(leaf, new_item, item_size - split_offset);
3043
3044         btrfs_set_item_offset(leaf, item,
3045                               orig_offset + item_size - split_offset);
3046         btrfs_set_item_size(leaf, item, split_offset);
3047
3048         btrfs_set_header_nritems(leaf, nritems + 1);
3049
3050         /* write the data for the start of the original item */
3051         write_extent_buffer(leaf, buf,
3052                             btrfs_item_ptr_offset(leaf, path->slots[0]),
3053                             split_offset);
3054
3055         /* write the data for the new item */
3056         write_extent_buffer(leaf, buf + split_offset,
3057                             btrfs_item_ptr_offset(leaf, slot),
3058                             item_size - split_offset);
3059         btrfs_mark_buffer_dirty(leaf);
3060
3061         ret = 0;
3062         if (btrfs_leaf_free_space(root, leaf) < 0) {
3063                 btrfs_print_leaf(root, leaf);
3064                 BUG();
3065         }
3066         kfree(buf);
3067         return ret;
3068 }
3069
3070 /*
3071  * make the item pointed to by the path smaller.  new_size indicates
3072  * how small to make it, and from_end tells us if we just chop bytes
3073  * off the end of the item or if we shift the item to chop bytes off
3074  * the front.
3075  */
3076 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3077                         struct btrfs_root *root,
3078                         struct btrfs_path *path,
3079                         u32 new_size, int from_end)
3080 {
3081         int ret = 0;
3082         int slot;
3083         int slot_orig;
3084         struct extent_buffer *leaf;
3085         struct btrfs_item *item;
3086         u32 nritems;
3087         unsigned int data_end;
3088         unsigned int old_data_start;
3089         unsigned int old_size;
3090         unsigned int size_diff;
3091         int i;
3092
3093         slot_orig = path->slots[0];
3094         leaf = path->nodes[0];
3095         slot = path->slots[0];
3096
3097         old_size = btrfs_item_size_nr(leaf, slot);
3098         if (old_size == new_size)
3099                 return 0;
3100
3101         nritems = btrfs_header_nritems(leaf);
3102         data_end = leaf_data_end(root, leaf);
3103
3104         old_data_start = btrfs_item_offset_nr(leaf, slot);
3105
3106         size_diff = old_size - new_size;
3107
3108         BUG_ON(slot < 0);
3109         BUG_ON(slot >= nritems);
3110
3111         /*
3112          * item0..itemN ... dataN.offset..dataN.size .. data0.size
3113          */
3114         /* first correct the data pointers */
3115         for (i = slot; i < nritems; i++) {
3116                 u32 ioff;
3117                 item = btrfs_item_nr(leaf, i);
3118
3119                 if (!leaf->map_token) {
3120                         map_extent_buffer(leaf, (unsigned long)item,
3121                                         sizeof(struct btrfs_item),
3122                                         &leaf->map_token, &leaf->kaddr,
3123                                         &leaf->map_start, &leaf->map_len,
3124                                         KM_USER1);
3125                 }
3126
3127                 ioff = btrfs_item_offset(leaf, item);
3128                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
3129         }
3130
3131         if (leaf->map_token) {
3132                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3133                 leaf->map_token = NULL;
3134         }
3135
3136         /* shift the data */
3137         if (from_end) {
3138                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3139                               data_end + size_diff, btrfs_leaf_data(leaf) +
3140                               data_end, old_data_start + new_size - data_end);
3141         } else {
3142                 struct btrfs_disk_key disk_key;
3143                 u64 offset;
3144
3145                 btrfs_item_key(leaf, &disk_key, slot);
3146
3147                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
3148                         unsigned long ptr;
3149                         struct btrfs_file_extent_item *fi;
3150
3151                         fi = btrfs_item_ptr(leaf, slot,
3152                                             struct btrfs_file_extent_item);
3153                         fi = (struct btrfs_file_extent_item *)(
3154                              (unsigned long)fi - size_diff);
3155
3156                         if (btrfs_file_extent_type(leaf, fi) ==
3157                             BTRFS_FILE_EXTENT_INLINE) {
3158                                 ptr = btrfs_item_ptr_offset(leaf, slot);
3159                                 memmove_extent_buffer(leaf, ptr,
3160                                       (unsigned long)fi,
3161                                       offsetof(struct btrfs_file_extent_item,
3162                                                  disk_bytenr));
3163                         }
3164                 }
3165
3166                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3167                               data_end + size_diff, btrfs_leaf_data(leaf) +
3168                               data_end, old_data_start - data_end);
3169
3170                 offset = btrfs_disk_key_offset(&disk_key);
3171                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
3172                 btrfs_set_item_key(leaf, &disk_key, slot);
3173                 if (slot == 0)
3174                         fixup_low_keys(trans, root, path, &disk_key, 1);
3175         }
3176
3177         item = btrfs_item_nr(leaf, slot);
3178         btrfs_set_item_size(leaf, item, new_size);
3179         btrfs_mark_buffer_dirty(leaf);
3180
3181         ret = 0;
3182         if (btrfs_leaf_free_space(root, leaf) < 0) {
3183                 btrfs_print_leaf(root, leaf);
3184                 BUG();
3185         }
3186         return ret;
3187 }
3188
3189 /*
3190  * make the item pointed to by the path bigger, data_size is the new size.
3191  */
3192 int btrfs_extend_item(struct btrfs_trans_handle *trans,
3193                       struct btrfs_root *root, struct btrfs_path *path,
3194                       u32 data_size)
3195 {
3196         int ret = 0;
3197         int slot;
3198         int slot_orig;
3199         struct extent_buffer *leaf;
3200         struct btrfs_item *item;
3201         u32 nritems;
3202         unsigned int data_end;
3203         unsigned int old_data;
3204         unsigned int old_size;
3205         int i;
3206
3207         slot_orig = path->slots[0];
3208         leaf = path->nodes[0];
3209
3210         nritems = btrfs_header_nritems(leaf);
3211         data_end = leaf_data_end(root, leaf);
3212
3213         if (btrfs_leaf_free_space(root, leaf) < data_size) {
3214                 btrfs_print_leaf(root, leaf);
3215                 BUG();
3216         }
3217         slot = path->slots[0];
3218         old_data = btrfs_item_end_nr(leaf, slot);
3219
3220         BUG_ON(slot < 0);
3221         if (slot >= nritems) {
3222                 btrfs_print_leaf(root, leaf);
3223                 printk(KERN_CRIT "slot %d too large, nritems %d\n",
3224                        slot, nritems);
3225                 BUG_ON(1);
3226         }
3227
3228         /*
3229          * item0..itemN ... dataN.offset..dataN.size .. data0.size
3230          */
3231         /* first correct the data pointers */
3232         for (i = slot; i < nritems; i++) {
3233                 u32 ioff;
3234                 item = btrfs_item_nr(leaf, i);
3235
3236                 if (!leaf->map_token) {
3237                         map_extent_buffer(leaf, (unsigned long)item,
3238                                         sizeof(struct btrfs_item),
3239                                         &leaf->map_token, &leaf->kaddr,
3240                                         &leaf->map_start, &leaf->map_len,
3241                                         KM_USER1);
3242                 }
3243                 ioff = btrfs_item_offset(leaf, item);
3244                 btrfs_set_item_offset(leaf, item, ioff - data_size);
3245         }
3246
3247         if (leaf->map_token) {
3248                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3249                 leaf->map_token = NULL;
3250         }
3251
3252         /* shift the data */
3253         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3254                       data_end - data_size, btrfs_leaf_data(leaf) +
3255                       data_end, old_data - data_end);
3256
3257         data_end = old_data;
3258         old_size = btrfs_item_size_nr(leaf, slot);
3259         item = btrfs_item_nr(leaf, slot);
3260         btrfs_set_item_size(leaf, item, old_size + data_size);
3261         btrfs_mark_buffer_dirty(leaf);
3262
3263         ret = 0;
3264         if (btrfs_leaf_free_space(root, leaf) < 0) {
3265                 btrfs_print_leaf(root, leaf);
3266                 BUG();
3267         }
3268         return ret;
3269 }
3270
3271 /*
3272  * Given a key and some data, insert items into the tree.
3273  * This does all the path init required, making room in the tree if needed.
3274  * Returns the number of keys that were inserted.
3275  */
3276 int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3277                             struct btrfs_root *root,
3278                             struct btrfs_path *path,
3279                             struct btrfs_key *cpu_key, u32 *data_size,
3280                             int nr)
3281 {
3282         struct extent_buffer *leaf;
3283         struct btrfs_item *item;
3284         int ret = 0;
3285         int slot;
3286         int i;
3287         u32 nritems;
3288         u32 total_data = 0;
3289         u32 total_size = 0;
3290         unsigned int data_end;
3291         struct btrfs_disk_key disk_key;
3292         struct btrfs_key found_key;
3293
3294         for (i = 0; i < nr; i++) {
3295                 if (total_size + data_size[i] + sizeof(struct btrfs_item) >
3296                     BTRFS_LEAF_DATA_SIZE(root)) {
3297                         break;
3298                         nr = i;
3299                 }
3300                 total_data += data_size[i];
3301                 total_size += data_size[i] + sizeof(struct btrfs_item);
3302         }
3303         BUG_ON(nr == 0);
3304
3305         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3306         if (ret == 0)
3307                 return -EEXIST;
3308         if (ret < 0)
3309                 goto out;
3310
3311         leaf = path->nodes[0];
3312
3313         nritems = btrfs_header_nritems(leaf);
3314         data_end = leaf_data_end(root, leaf);
3315
3316         if (btrfs_leaf_free_space(root, leaf) < total_size) {
3317                 for (i = nr; i >= 0; i--) {
3318                         total_data -= data_size[i];
3319                         total_size -= data_size[i] + sizeof(struct btrfs_item);
3320                         if (total_size < btrfs_leaf_free_space(root, leaf))
3321                                 break;
3322                 }
3323                 nr = i;
3324         }
3325
3326         slot = path->slots[0];
3327         BUG_ON(slot < 0);
3328
3329         if (slot != nritems) {
3330                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3331
3332                 item = btrfs_item_nr(leaf, slot);
3333                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3334
3335                 /* figure out how many keys we can insert in here */
3336                 total_data = data_size[0];
3337                 for (i = 1; i < nr; i++) {
3338                         if (comp_cpu_keys(&found_key, cpu_key + i) <= 0)
3339                                 break;
3340                         total_data += data_size[i];
3341                 }
3342                 nr = i;
3343
3344                 if (old_data < data_end) {
3345                         btrfs_print_leaf(root, leaf);
3346                         printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
3347                                slot, old_data, data_end);
3348                         BUG_ON(1);
3349                 }
3350                 /*
3351                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
3352                  */
3353                 /* first correct the data pointers */
3354                 WARN_ON(leaf->map_token);
3355                 for (i = slot; i < nritems; i++) {
3356                         u32 ioff;
3357
3358                         item = btrfs_item_nr(leaf, i);
3359                         if (!leaf->map_token) {
3360                                 map_extent_buffer(leaf, (unsigned long)item,
3361                                         sizeof(struct btrfs_item),
3362                                         &leaf->map_token, &leaf->kaddr,
3363                                         &leaf->map_start, &leaf->map_len,
3364                                         KM_USER1);
3365                         }
3366
3367                         ioff = btrfs_item_offset(leaf, item);
3368                         btrfs_set_item_offset(leaf, item, ioff - total_data);
3369                 }
3370                 if (leaf->map_token) {
3371                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3372                         leaf->map_token = NULL;
3373                 }
3374
3375                 /* shift the items */
3376                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3377                               btrfs_item_nr_offset(slot),
3378                               (nritems - slot) * sizeof(struct btrfs_item));
3379
3380                 /* shift the data */
3381                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3382                               data_end - total_data, btrfs_leaf_data(leaf) +
3383                               data_end, old_data - data_end);
3384                 data_end = old_data;
3385         } else {
3386                 /*
3387                  * this sucks but it has to be done, if we are inserting at
3388                  * the end of the leaf only insert 1 of the items, since we
3389                  * have no way of knowing whats on the next leaf and we'd have
3390                  * to drop our current locks to figure it out
3391                  */
3392                 nr = 1;
3393         }
3394
3395         /* setup the item for the new data */
3396         for (i = 0; i < nr; i++) {
3397                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3398                 btrfs_set_item_key(leaf, &disk_key, slot + i);
3399                 item = btrfs_item_nr(leaf, slot + i);
3400                 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
3401                 data_end -= data_size[i];
3402                 btrfs_set_item_size(leaf, item, data_size[i]);
3403         }
3404         btrfs_set_header_nritems(leaf, nritems + nr);
3405         btrfs_mark_buffer_dirty(leaf);
3406
3407         ret = 0;
3408         if (slot == 0) {
3409                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3410                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3411         }
3412
3413         if (btrfs_leaf_free_space(root, leaf) < 0) {
3414                 btrfs_print_leaf(root, leaf);
3415                 BUG();
3416         }
3417 out:
3418         if (!ret)
3419                 ret = nr;
3420         return ret;
3421 }
3422
3423 /*
3424  * Given a key and some data, insert items into the tree.
3425  * This does all the path init required, making room in the tree if needed.
3426  */
3427 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3428                             struct btrfs_root *root,
3429                             struct btrfs_path *path,
3430                             struct btrfs_key *cpu_key, u32 *data_size,
3431                             int nr)
3432 {
3433         struct extent_buffer *leaf;
3434         struct btrfs_item *item;
3435         int ret = 0;
3436         int slot;
3437         int slot_orig;
3438         int i;
3439         u32 nritems;
3440         u32 total_size = 0;
3441         u32 total_data = 0;
3442         unsigned int data_end;
3443         struct btrfs_disk_key disk_key;
3444
3445         for (i = 0; i < nr; i++)
3446                 total_data += data_size[i];
3447
3448         total_size = total_data + (nr * sizeof(struct btrfs_item));
3449         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3450         if (ret == 0)
3451                 return -EEXIST;
3452         if (ret < 0)
3453                 goto out;
3454
3455         slot_orig = path->slots[0];
3456         leaf = path->nodes[0];
3457
3458         nritems = btrfs_header_nritems(leaf);
3459         data_end = leaf_data_end(root, leaf);
3460
3461         if (btrfs_leaf_free_space(root, leaf) < total_size) {
3462                 btrfs_print_leaf(root, leaf);
3463                 printk(KERN_CRIT "not enough freespace need %u have %d\n",
3464                        total_size, btrfs_leaf_free_space(root, leaf));
3465                 BUG();
3466         }
3467
3468         slot = path->slots[0];
3469         BUG_ON(slot < 0);
3470
3471         if (slot != nritems) {
3472                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3473
3474                 if (old_data < data_end) {
3475                         btrfs_print_leaf(root, leaf);
3476                         printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
3477                                slot, old_data, data_end);
3478                         BUG_ON(1);
3479                 }
3480                 /*
3481                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
3482                  */
3483                 /* first correct the data pointers */
3484                 WARN_ON(leaf->map_token);
3485                 for (i = slot; i < nritems; i++) {
3486                         u32 ioff;
3487
3488                         item = btrfs_item_nr(leaf, i);
3489                         if (!leaf->map_token) {
3490                                 map_extent_buffer(leaf, (unsigned long)item,
3491                                         sizeof(struct btrfs_item),
3492                                         &leaf->map_token, &leaf->kaddr,
3493                                         &leaf->map_start, &leaf->map_len,
3494                                         KM_USER1);
3495                         }
3496
3497                         ioff = btrfs_item_offset(leaf, item);
3498                         btrfs_set_item_offset(leaf, item, ioff - total_data);
3499                 }
3500                 if (leaf->map_token) {
3501                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3502                         leaf->map_token = NULL;
3503                 }
3504
3505                 /* shift the items */
3506                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3507                               btrfs_item_nr_offset(slot),
3508                               (nritems - slot) * sizeof(struct btrfs_item));
3509
3510                 /* shift the data */
3511                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3512                               data_end - total_data, btrfs_leaf_data(leaf) +
3513                               data_end, old_data - data_end);
3514                 data_end = old_data;
3515         }
3516
3517         /* setup the item for the new data */
3518         for (i = 0; i < nr; i++) {
3519                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3520                 btrfs_set_item_key(leaf, &disk_key, slot + i);
3521                 item = btrfs_item_nr(leaf, slot + i);
3522                 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
3523                 data_end -= data_size[i];
3524                 btrfs_set_item_size(leaf, item, data_size[i]);
3525         }
3526         btrfs_set_header_nritems(leaf, nritems + nr);
3527         btrfs_mark_buffer_dirty(leaf);
3528
3529         ret = 0;
3530         if (slot == 0) {
3531                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3532                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3533         }
3534
3535         if (btrfs_leaf_free_space(root, leaf) < 0) {
3536                 btrfs_print_leaf(root, leaf);
3537                 BUG();
3538         }
3539 out:
3540         btrfs_unlock_up_safe(path, 1);
3541         return ret;
3542 }
3543
3544 /*
3545  * Given a key and some data, insert an item into the tree.
3546  * This does all the path init required, making room in the tree if needed.
3547  */
3548 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
3549                       *root, struct btrfs_key *cpu_key, void *data, u32
3550                       data_size)
3551 {
3552         int ret = 0;
3553         struct btrfs_path *path;
3554         struct extent_buffer *leaf;
3555         unsigned long ptr;
3556
3557         path = btrfs_alloc_path();
3558         BUG_ON(!path);
3559         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
3560         if (!ret) {
3561                 leaf = path->nodes[0];
3562                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3563                 write_extent_buffer(leaf, data, ptr, data_size);
3564                 btrfs_mark_buffer_dirty(leaf);
3565         }
3566         btrfs_free_path(path);
3567         return ret;
3568 }
3569
3570 /*
3571  * delete the pointer from a given node.
3572  *
3573  * the tree should have been previously balanced so the deletion does not
3574  * empty a node.
3575  */
3576 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3577                    struct btrfs_path *path, int level, int slot)
3578 {
3579         struct extent_buffer *parent = path->nodes[level];
3580         u32 nritems;
3581         int ret = 0;
3582         int wret;
3583
3584         nritems = btrfs_header_nritems(parent);
3585         if (slot != nritems - 1) {
3586                 memmove_extent_buffer(parent,
3587                               btrfs_node_key_ptr_offset(slot),
3588                               btrfs_node_key_ptr_offset(slot + 1),
3589                               sizeof(struct btrfs_key_ptr) *
3590                               (nritems - slot - 1));
3591         }
3592         nritems--;
3593         btrfs_set_header_nritems(parent, nritems);
3594         if (nritems == 0 && parent == root->node) {
3595                 BUG_ON(btrfs_header_level(root->node) != 1);
3596                 /* just turn the root into a leaf and break */
3597                 btrfs_set_header_level(root->node, 0);
3598         } else if (slot == 0) {
3599                 struct btrfs_disk_key disk_key;
3600
3601                 btrfs_node_key(parent, &disk_key, 0);
3602                 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
3603                 if (wret)
3604                         ret = wret;
3605         }
3606         btrfs_mark_buffer_dirty(parent);
3607         return ret;
3608 }
3609
3610 /*
3611  * a helper function to delete the leaf pointed to by path->slots[1] and
3612  * path->nodes[1].  bytenr is the node block pointer, but since the callers
3613  * already know it, it is faster to have them pass it down than to
3614  * read it out of the node again.
3615  *
3616  * This deletes the pointer in path->nodes[1] and frees the leaf
3617  * block extent.  zero is returned if it all worked out, < 0 otherwise.
3618  *
3619  * The path must have already been setup for deleting the leaf, including
3620  * all the proper balancing.  path->nodes[1] must be locked.
3621  */
3622 noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3623                             struct btrfs_root *root,
3624                             struct btrfs_path *path, u64 bytenr)
3625 {
3626         int ret;
3627         u64 root_gen = btrfs_header_generation(path->nodes[1]);
3628         u64 parent_start = path->nodes[1]->start;
3629         u64 parent_owner = btrfs_header_owner(path->nodes[1]);
3630
3631         ret = del_ptr(trans, root, path, 1, path->slots[1]);
3632         if (ret)
3633                 return ret;
3634
3635         /*
3636          * btrfs_free_extent is expensive, we want to make sure we
3637          * aren't holding any locks when we call it
3638          */
3639         btrfs_unlock_up_safe(path, 0);
3640
3641         ret = btrfs_free_extent(trans, root, bytenr,
3642                                 btrfs_level_size(root, 0),
3643                                 parent_start, parent_owner,
3644                                 root_gen, 0, 1);
3645         return ret;
3646 }
3647 /*
3648  * delete the item at the leaf level in path.  If that empties
3649  * the leaf, remove it from the tree
3650  */
3651 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3652                     struct btrfs_path *path, int slot, int nr)
3653 {
3654         struct extent_buffer *leaf;
3655         struct btrfs_item *item;
3656         int last_off;
3657         int dsize = 0;
3658         int ret = 0;
3659         int wret;
3660         int i;
3661         u32 nritems;
3662
3663         leaf = path->nodes[0];
3664         last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
3665
3666         for (i = 0; i < nr; i++)
3667                 dsize += btrfs_item_size_nr(leaf, slot + i);
3668
3669         nritems = btrfs_header_nritems(leaf);
3670
3671         if (slot + nr != nritems) {
3672                 int data_end = leaf_data_end(root, leaf);
3673
3674                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3675                               data_end + dsize,
3676                               btrfs_leaf_data(leaf) + data_end,
3677                               last_off - data_end);
3678
3679                 for (i = slot + nr; i < nritems; i++) {
3680                         u32 ioff;
3681
3682                         item = btrfs_item_nr(leaf, i);
3683                         if (!leaf->map_token) {
3684                                 map_extent_buffer(leaf, (unsigned long)item,
3685                                         sizeof(struct btrfs_item),
3686                                         &leaf->map_token, &leaf->kaddr,
3687                                         &leaf->map_start, &leaf->map_len,
3688                                         KM_USER1);
3689                         }
3690                         ioff = btrfs_item_offset(leaf, item);
3691                         btrfs_set_item_offset(leaf, item, ioff + dsize);
3692                 }
3693
3694                 if (leaf->map_token) {
3695                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3696                         leaf->map_token = NULL;
3697                 }
3698
3699                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
3700                               btrfs_item_nr_offset(slot + nr),
3701                               sizeof(struct btrfs_item) *
3702                               (nritems - slot - nr));
3703         }
3704         btrfs_set_header_nritems(leaf, nritems - nr);
3705         nritems -= nr;
3706
3707         /* delete the leaf if we've emptied it */
3708         if (nritems == 0) {
3709                 if (leaf == root->node) {
3710                         btrfs_set_header_level(leaf, 0);
3711                 } else {
3712                         ret = btrfs_del_leaf(trans, root, path, leaf->start);
3713                         BUG_ON(ret);
3714                 }
3715         } else {
3716                 int used = leaf_space_used(leaf, 0, nritems);
3717                 if (slot == 0) {
3718                         struct btrfs_disk_key disk_key;
3719
3720                         btrfs_item_key(leaf, &disk_key, 0);
3721                         wret = fixup_low_keys(trans, root, path,
3722                                               &disk_key, 1);
3723                         if (wret)
3724                                 ret = wret;
3725                 }
3726
3727                 /* delete the leaf if it is mostly empty */
3728                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
3729                         /* push_leaf_left fixes the path.
3730                          * make sure the path still points to our leaf
3731                          * for possible call to del_ptr below
3732                          */
3733                         slot = path->slots[1];
3734                         extent_buffer_get(leaf);
3735
3736                         wret = push_leaf_left(trans, root, path, 1, 1);
3737                         if (wret < 0 && wret != -ENOSPC)
3738                                 ret = wret;
3739
3740                         if (path->nodes[0] == leaf &&
3741                             btrfs_header_nritems(leaf)) {
3742                                 wret = push_leaf_right(trans, root, path, 1, 1);
3743                                 if (wret < 0 && wret != -ENOSPC)
3744                                         ret = wret;
3745                         }
3746
3747                         if (btrfs_header_nritems(leaf) == 0) {
3748                                 path->slots[1] = slot;
3749                                 ret = btrfs_del_leaf(trans, root, path,
3750                                                      leaf->start);
3751                                 BUG_ON(ret);
3752                                 free_extent_buffer(leaf);
3753                         } else {
3754                                 /* if we're still in the path, make sure
3755                                  * we're dirty.  Otherwise, one of the
3756                                  * push_leaf functions must have already
3757                                  * dirtied this buffer
3758                                  */
3759                                 if (path->nodes[0] == leaf)
3760                                         btrfs_mark_buffer_dirty(leaf);
3761                                 free_extent_buffer(leaf);
3762                         }
3763                 } else {
3764                         btrfs_mark_buffer_dirty(leaf);
3765                 }
3766         }
3767         return ret;
3768 }
3769
3770 /*
3771  * search the tree again to find a leaf with lesser keys
3772  * returns 0 if it found something or 1 if there are no lesser leaves.
3773  * returns < 0 on io errors.
3774  *
3775  * This may release the path, and so you may lose any locks held at the
3776  * time you call it.
3777  */
3778 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
3779 {
3780         struct btrfs_key key;
3781         struct btrfs_disk_key found_key;
3782         int ret;
3783
3784         btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
3785
3786         if (key.offset > 0)
3787                 key.offset--;
3788         else if (key.type > 0)
3789                 key.type--;
3790         else if (key.objectid > 0)
3791                 key.objectid--;
3792         else
3793                 return 1;
3794
3795         btrfs_release_path(root, path);
3796         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3797         if (ret < 0)
3798                 return ret;
3799         btrfs_item_key(path->nodes[0], &found_key, 0);
3800         ret = comp_keys(&found_key, &key);
3801         if (ret < 0)
3802                 return 0;
3803         return 1;
3804 }
3805
3806 /*
3807  * A helper function to walk down the tree starting at min_key, and looking
3808  * for nodes or leaves that are either in cache or have a minimum
3809  * transaction id.  This is used by the btree defrag code, and tree logging
3810  *
3811  * This does not cow, but it does stuff the starting key it finds back
3812  * into min_key, so you can call btrfs_search_slot with cow=1 on the
3813  * key and get a writable path.
3814  *
3815  * This does lock as it descends, and path->keep_locks should be set
3816  * to 1 by the caller.
3817  *
3818  * This honors path->lowest_level to prevent descent past a given level
3819  * of the tree.
3820  *
3821  * min_trans indicates the oldest transaction that you are interested
3822  * in walking through.  Any nodes or leaves older than min_trans are
3823  * skipped over (without reading them).
3824  *
3825  * returns zero if something useful was found, < 0 on error and 1 if there
3826  * was nothing in the tree that matched the search criteria.
3827  */
3828 int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
3829                          struct btrfs_key *max_key,
3830                          struct btrfs_path *path, int cache_only,
3831                          u64 min_trans)
3832 {
3833         struct extent_buffer *cur;
3834         struct btrfs_key found_key;
3835         int slot;
3836         int sret;
3837         u32 nritems;
3838         int level;
3839         int ret = 1;
3840
3841         WARN_ON(!path->keep_locks);
3842 again:
3843         cur = btrfs_lock_root_node(root);
3844         level = btrfs_header_level(cur);
3845         WARN_ON(path->nodes[level]);
3846         path->nodes[level] = cur;
3847         path->locks[level] = 1;
3848
3849         if (btrfs_header_generation(cur) < min_trans) {
3850                 ret = 1;
3851                 goto out;
3852         }
3853         while (1) {
3854                 nritems = btrfs_header_nritems(cur);
3855                 level = btrfs_header_level(cur);
3856                 sret = bin_search(cur, min_key, level, &slot);
3857
3858                 /* at the lowest level, we're done, setup the path and exit */
3859                 if (level == path->lowest_level) {
3860                         if (slot >= nritems)
3861                                 goto find_next_key;
3862                         ret = 0;
3863                         path->slots[level] = slot;
3864                         btrfs_item_key_to_cpu(cur, &found_key, slot);
3865                         goto out;
3866                 }
3867                 if (sret && slot > 0)
3868                         slot--;
3869                 /*
3870                  * check this node pointer against the cache_only and
3871                  * min_trans parameters.  If it isn't in cache or is too
3872                  * old, skip to the next one.
3873                  */
3874                 while (slot < nritems) {
3875                         u64 blockptr;
3876                         u64 gen;
3877                         struct extent_buffer *tmp;
3878                         struct btrfs_disk_key disk_key;
3879
3880                         blockptr = btrfs_node_blockptr(cur, slot);
3881                         gen = btrfs_node_ptr_generation(cur, slot);
3882                         if (gen < min_trans) {
3883                                 slot++;
3884                                 continue;
3885                         }
3886                         if (!cache_only)
3887                                 break;
3888
3889                         if (max_key) {
3890                                 btrfs_node_key(cur, &disk_key, slot);
3891                                 if (comp_keys(&disk_key, max_key) >= 0) {
3892                                         ret = 1;
3893                                         goto out;
3894                                 }
3895                         }
3896
3897                         tmp = btrfs_find_tree_block(root, blockptr,
3898                                             btrfs_level_size(root, level - 1));
3899
3900                         if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
3901                                 free_extent_buffer(tmp);
3902                                 break;
3903                         }
3904                         if (tmp)
3905                                 free_extent_buffer(tmp);
3906                         slot++;
3907                 }
3908 find_next_key:
3909                 /*
3910                  * we didn't find a candidate key in this node, walk forward
3911                  * and find another one
3912                  */
3913                 if (slot >= nritems) {
3914                         path->slots[level] = slot;
3915                         btrfs_set_path_blocking(path);
3916                         sret = btrfs_find_next_key(root, path, min_key, level,
3917                                                   cache_only, min_trans);
3918                         if (sret == 0) {
3919                                 btrfs_release_path(root, path);
3920                                 goto again;
3921                         } else {
3922                                 btrfs_clear_path_blocking(path);
3923                                 goto out;
3924                         }
3925                 }
3926                 /* save our key for returning back */
3927                 btrfs_node_key_to_cpu(cur, &found_key, slot);
3928                 path->slots[level] = slot;
3929                 if (level == path->lowest_level) {
3930                         ret = 0;
3931                         unlock_up(path, level, 1);
3932                         goto out;
3933                 }
3934                 btrfs_set_path_blocking(path);
3935                 cur = read_node_slot(root, cur, slot);
3936
3937                 btrfs_tree_lock(cur);
3938
3939                 path->locks[level - 1] = 1;
3940                 path->nodes[level - 1] = cur;
3941                 unlock_up(path, level, 1);
3942                 btrfs_clear_path_blocking(path);
3943         }
3944 out:
3945         if (ret == 0)
3946                 memcpy(min_key, &found_key, sizeof(found_key));
3947         btrfs_set_path_blocking(path);
3948         return ret;
3949 }
3950
3951 /*
3952  * this is similar to btrfs_next_leaf, but does not try to preserve
3953  * and fixup the path.  It looks for and returns the next key in the
3954  * tree based on the current path and the cache_only and min_trans
3955  * parameters.
3956  *
3957  * 0 is returned if another key is found, < 0 if there are any errors
3958  * and 1 is returned if there are no higher keys in the tree
3959  *
3960  * path->keep_locks should be set to 1 on the search made before
3961  * calling this function.
3962  */
3963 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
3964                         struct btrfs_key *key, int lowest_level,
3965                         int cache_only, u64 min_trans)
3966 {
3967         int level = lowest_level;
3968         int slot;
3969         struct extent_buffer *c;
3970
3971         WARN_ON(!path->keep_locks);
3972         while (level < BTRFS_MAX_LEVEL) {
3973                 if (!path->nodes[level])
3974                         return 1;
3975
3976                 slot = path->slots[level] + 1;
3977                 c = path->nodes[level];
3978 next:
3979                 if (slot >= btrfs_header_nritems(c)) {
3980                         level++;
3981                         if (level == BTRFS_MAX_LEVEL)
3982                                 return 1;
3983                         continue;
3984                 }
3985                 if (level == 0)
3986                         btrfs_item_key_to_cpu(c, key, slot);
3987                 else {
3988                         u64 blockptr = btrfs_node_blockptr(c, slot);
3989                         u64 gen = btrfs_node_ptr_generation(c, slot);
3990
3991                         if (cache_only) {
3992                                 struct extent_buffer *cur;
3993                                 cur = btrfs_find_tree_block(root, blockptr,
3994                                             btrfs_level_size(root, level - 1));
3995                                 if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
3996                                         slot++;
3997                                         if (cur)
3998                                                 free_extent_buffer(cur);
3999                                         goto next;
4000                                 }
4001                                 free_extent_buffer(cur);
4002                         }
4003                         if (gen < min_trans) {
4004                                 slot++;
4005                                 goto next;
4006                         }
4007                         btrfs_node_key_to_cpu(c, key, slot);
4008                 }
4009                 return 0;
4010         }
4011         return 1;
4012 }
4013
4014 /*
4015  * search the tree again to find a leaf with greater keys
4016  * returns 0 if it found something or 1 if there are no greater leaves.
4017  * returns < 0 on io errors.
4018  */
4019 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4020 {
4021         int slot;
4022         int level = 1;
4023         struct extent_buffer *c;
4024         struct extent_buffer *next = NULL;
4025         struct btrfs_key key;
4026         u32 nritems;
4027         int ret;
4028
4029         nritems = btrfs_header_nritems(path->nodes[0]);
4030         if (nritems == 0)
4031                 return 1;
4032
4033         btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
4034
4035         btrfs_release_path(root, path);
4036         path->keep_locks = 1;
4037         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4038         path->keep_locks = 0;
4039
4040         if (ret < 0)
4041                 return ret;
4042
4043         btrfs_set_path_blocking(path);
4044         nritems = btrfs_header_nritems(path->nodes[0]);
4045         /*
4046          * by releasing the path above we dropped all our locks.  A balance
4047          * could have added more items next to the key that used to be
4048          * at the very end of the block.  So, check again here and
4049          * advance the path if there are now more items available.
4050          */
4051         if (nritems > 0 && path->slots[0] < nritems - 1) {
4052                 path->slots[0]++;
4053                 goto done;
4054         }
4055
4056         while (level < BTRFS_MAX_LEVEL) {
4057                 if (!path->nodes[level])
4058                         return 1;
4059
4060                 slot = path->slots[level] + 1;
4061                 c = path->nodes[level];
4062                 if (slot >= btrfs_header_nritems(c)) {
4063                         level++;
4064                         if (level == BTRFS_MAX_LEVEL)
4065                                 return 1;
4066                         continue;
4067                 }
4068
4069                 if (next) {
4070                         btrfs_tree_unlock(next);
4071                         free_extent_buffer(next);
4072                 }
4073
4074                 /* the path was set to blocking above */
4075                 if (level == 1 && (path->locks[1] || path->skip_locking) &&
4076                     path->reada)
4077                         reada_for_search(root, path, level, slot, 0);
4078
4079                 next = read_node_slot(root, c, slot);
4080                 if (!path->skip_locking) {
4081                         WARN_ON(!btrfs_tree_locked(c));
4082                         btrfs_tree_lock(next);
4083                         btrfs_set_lock_blocking(next);
4084                 }
4085                 break;
4086         }
4087         path->slots[level] = slot;
4088         while (1) {
4089                 level--;
4090                 c = path->nodes[level];
4091                 if (path->locks[level])
4092                         btrfs_tree_unlock(c);
4093                 free_extent_buffer(c);
4094                 path->nodes[level] = next;
4095                 path->slots[level] = 0;
4096                 if (!path->skip_locking)
4097                         path->locks[level] = 1;
4098                 if (!level)
4099                         break;
4100
4101                 btrfs_set_path_blocking(path);
4102                 if (level == 1 && path->locks[1] && path->reada)
4103                         reada_for_search(root, path, level, slot, 0);
4104                 next = read_node_slot(root, next, 0);
4105                 if (!path->skip_locking) {
4106                         WARN_ON(!btrfs_tree_locked(path->nodes[level]));
4107                         btrfs_tree_lock(next);
4108                         btrfs_set_lock_blocking(next);
4109                 }
4110         }
4111 done:
4112         unlock_up(path, 0, 1);
4113         return 0;
4114 }
4115
4116 /*
4117  * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
4118  * searching until it gets past min_objectid or finds an item of 'type'
4119  *
4120  * returns 0 if something is found, 1 if nothing was found and < 0 on error
4121  */
4122 int btrfs_previous_item(struct btrfs_root *root,
4123                         struct btrfs_path *path, u64 min_objectid,
4124                         int type)
4125 {
4126         struct btrfs_key found_key;
4127         struct extent_buffer *leaf;
4128         u32 nritems;
4129         int ret;
4130
4131         while (1) {
4132                 if (path->slots[0] == 0) {
4133                         btrfs_set_path_blocking(path);
4134                         ret = btrfs_prev_leaf(root, path);
4135                         if (ret != 0)
4136                                 return ret;
4137                 } else {
4138                         path->slots[0]--;
4139                 }
4140                 leaf = path->nodes[0];
4141                 nritems = btrfs_header_nritems(leaf);
4142                 if (nritems == 0)
4143                         return 1;
4144                 if (path->slots[0] == nritems)
4145                         path->slots[0]--;
4146
4147                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4148                 if (found_key.type == type)
4149                         return 0;
4150                 if (found_key.objectid < min_objectid)
4151                         break;
4152                 if (found_key.objectid == min_objectid &&
4153                     found_key.type < type)
4154                         break;
4155         }
4156         return 1;
4157 }