Btrfs: Verify checksums on tree blocks found without read_tree_block
[sfrench/cifs-2.6.git] / fs / btrfs / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "transaction.h"
23 #include "print-tree.h"
24
25 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
26                       *root, struct btrfs_path *path, int level);
27 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
28                       *root, struct btrfs_key *ins_key,
29                       struct btrfs_path *path, int data_size, int extend);
30 static int push_node_left(struct btrfs_trans_handle *trans,
31                           struct btrfs_root *root, struct extent_buffer *dst,
32                           struct extent_buffer *src);
33 static int balance_node_right(struct btrfs_trans_handle *trans,
34                               struct btrfs_root *root,
35                               struct extent_buffer *dst_buf,
36                               struct extent_buffer *src_buf);
37 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
38                    struct btrfs_path *path, int level, int slot);
39
40 inline void btrfs_init_path(struct btrfs_path *p)
41 {
42         memset(p, 0, sizeof(*p));
43 }
44
45 struct btrfs_path *btrfs_alloc_path(void)
46 {
47         struct btrfs_path *path;
48         path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
49         if (path) {
50                 btrfs_init_path(path);
51                 path->reada = 1;
52         }
53         return path;
54 }
55
56 void btrfs_free_path(struct btrfs_path *p)
57 {
58         btrfs_release_path(NULL, p);
59         kmem_cache_free(btrfs_path_cachep, p);
60 }
61
62 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
63 {
64         int i;
65         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
66                 if (!p->nodes[i])
67                         break;
68                 free_extent_buffer(p->nodes[i]);
69         }
70         memset(p, 0, sizeof(*p));
71 }
72
73 static void add_root_to_dirty_list(struct btrfs_root *root)
74 {
75         if (root->track_dirty && list_empty(&root->dirty_list)) {
76                 list_add(&root->dirty_list,
77                          &root->fs_info->dirty_cowonly_roots);
78         }
79 }
80
81 int btrfs_copy_root(struct btrfs_trans_handle *trans,
82                       struct btrfs_root *root,
83                       struct extent_buffer *buf,
84                       struct extent_buffer **cow_ret, u64 new_root_objectid)
85 {
86         struct extent_buffer *cow;
87         u32 nritems;
88         int ret = 0;
89         int level;
90         struct btrfs_key first_key;
91         struct btrfs_root *new_root;
92
93         new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
94         if (!new_root)
95                 return -ENOMEM;
96
97         memcpy(new_root, root, sizeof(*new_root));
98         new_root->root_key.objectid = new_root_objectid;
99
100         WARN_ON(root->ref_cows && trans->transid !=
101                 root->fs_info->running_transaction->transid);
102         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
103
104         level = btrfs_header_level(buf);
105         nritems = btrfs_header_nritems(buf);
106         if (nritems) {
107                 if (level == 0)
108                         btrfs_item_key_to_cpu(buf, &first_key, 0);
109                 else
110                         btrfs_node_key_to_cpu(buf, &first_key, 0);
111         } else {
112                 first_key.objectid = 0;
113         }
114         cow = __btrfs_alloc_free_block(trans, new_root, buf->len,
115                                        new_root_objectid,
116                                        trans->transid, first_key.objectid,
117                                        level, buf->start, 0);
118         if (IS_ERR(cow)) {
119                 kfree(new_root);
120                 return PTR_ERR(cow);
121         }
122
123         copy_extent_buffer(cow, buf, 0, 0, cow->len);
124         btrfs_set_header_bytenr(cow, cow->start);
125         btrfs_set_header_generation(cow, trans->transid);
126         btrfs_set_header_owner(cow, new_root_objectid);
127         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
128
129         WARN_ON(btrfs_header_generation(buf) > trans->transid);
130         ret = btrfs_inc_ref(trans, new_root, buf);
131         kfree(new_root);
132
133         if (ret)
134                 return ret;
135
136         btrfs_mark_buffer_dirty(cow);
137         *cow_ret = cow;
138         return 0;
139 }
140
141 int __btrfs_cow_block(struct btrfs_trans_handle *trans,
142                              struct btrfs_root *root,
143                              struct extent_buffer *buf,
144                              struct extent_buffer *parent, int parent_slot,
145                              struct extent_buffer **cow_ret,
146                              u64 search_start, u64 empty_size)
147 {
148         u64 root_gen;
149         struct extent_buffer *cow;
150         u32 nritems;
151         int ret = 0;
152         int different_trans = 0;
153         int level;
154         struct btrfs_key first_key;
155
156         if (root->ref_cows) {
157                 root_gen = trans->transid;
158         } else {
159                 root_gen = 0;
160         }
161         if (!(buf->flags & EXTENT_CSUM))
162                 WARN_ON(1);
163
164         WARN_ON(root->ref_cows && trans->transid !=
165                 root->fs_info->running_transaction->transid);
166         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
167
168         level = btrfs_header_level(buf);
169         nritems = btrfs_header_nritems(buf);
170         if (nritems) {
171                 if (level == 0)
172                         btrfs_item_key_to_cpu(buf, &first_key, 0);
173                 else
174                         btrfs_node_key_to_cpu(buf, &first_key, 0);
175         } else {
176                 first_key.objectid = 0;
177         }
178         cow = __btrfs_alloc_free_block(trans, root, buf->len,
179                                      root->root_key.objectid,
180                                      root_gen, first_key.objectid, level,
181                                      search_start, empty_size);
182         if (IS_ERR(cow))
183                 return PTR_ERR(cow);
184
185         copy_extent_buffer(cow, buf, 0, 0, cow->len);
186         btrfs_set_header_bytenr(cow, cow->start);
187         btrfs_set_header_generation(cow, trans->transid);
188         btrfs_set_header_owner(cow, root->root_key.objectid);
189         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
190
191         WARN_ON(btrfs_header_generation(buf) > trans->transid);
192         if (btrfs_header_generation(buf) != trans->transid) {
193                 different_trans = 1;
194                 ret = btrfs_inc_ref(trans, root, buf);
195                 if (ret)
196                         return ret;
197         } else {
198                 clean_tree_block(trans, root, buf);
199         }
200
201         if (buf == root->node) {
202                 root_gen = btrfs_header_generation(buf);
203                 root->node = cow;
204                 extent_buffer_get(cow);
205                 if (buf != root->commit_root) {
206                         btrfs_free_extent(trans, root, buf->start,
207                                           buf->len, root->root_key.objectid,
208                                           root_gen, 0, 0, 1);
209                 }
210                 free_extent_buffer(buf);
211                 add_root_to_dirty_list(root);
212         } else {
213                 root_gen = btrfs_header_generation(parent);
214                 btrfs_set_node_blockptr(parent, parent_slot,
215                                         cow->start);
216                 WARN_ON(trans->transid == 0);
217                 btrfs_set_node_ptr_generation(parent, parent_slot,
218                                               trans->transid);
219                 btrfs_mark_buffer_dirty(parent);
220                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
221                 btrfs_free_extent(trans, root, buf->start, buf->len,
222                                   btrfs_header_owner(parent), root_gen,
223                                   0, 0, 1);
224         }
225         free_extent_buffer(buf);
226         btrfs_mark_buffer_dirty(cow);
227         *cow_ret = cow;
228         return 0;
229 }
230
231 int btrfs_cow_block(struct btrfs_trans_handle *trans,
232                     struct btrfs_root *root, struct extent_buffer *buf,
233                     struct extent_buffer *parent, int parent_slot,
234                     struct extent_buffer **cow_ret)
235 {
236         u64 search_start;
237         u64 header_trans;
238         int ret;
239
240         if (trans->transaction != root->fs_info->running_transaction) {
241                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
242                        root->fs_info->running_transaction->transid);
243                 WARN_ON(1);
244         }
245         if (trans->transid != root->fs_info->generation) {
246                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
247                        root->fs_info->generation);
248                 WARN_ON(1);
249         }
250         if (!(buf->flags & EXTENT_CSUM))
251                 WARN_ON(1);
252
253         header_trans = btrfs_header_generation(buf);
254         spin_lock(&root->fs_info->hash_lock);
255         if (header_trans == trans->transid &&
256             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
257                 *cow_ret = buf;
258                 spin_unlock(&root->fs_info->hash_lock);
259                 return 0;
260         }
261         spin_unlock(&root->fs_info->hash_lock);
262         search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
263         ret = __btrfs_cow_block(trans, root, buf, parent,
264                                  parent_slot, cow_ret, search_start, 0);
265         return ret;
266 }
267
268 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
269 {
270         if (blocknr < other && other - (blocknr + blocksize) < 32768)
271                 return 1;
272         if (blocknr > other && blocknr - (other + blocksize) < 32768)
273                 return 1;
274         return 0;
275 }
276
277 /*
278  * compare two keys in a memcmp fashion
279  */
280 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
281 {
282         struct btrfs_key k1;
283
284         btrfs_disk_key_to_cpu(&k1, disk);
285
286         if (k1.objectid > k2->objectid)
287                 return 1;
288         if (k1.objectid < k2->objectid)
289                 return -1;
290         if (k1.type > k2->type)
291                 return 1;
292         if (k1.type < k2->type)
293                 return -1;
294         if (k1.offset > k2->offset)
295                 return 1;
296         if (k1.offset < k2->offset)
297                 return -1;
298         return 0;
299 }
300
301
302 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
303                        struct btrfs_root *root, struct extent_buffer *parent,
304                        int start_slot, int cache_only, u64 *last_ret,
305                        struct btrfs_key *progress)
306 {
307         struct extent_buffer *cur;
308         struct extent_buffer *tmp;
309         u64 blocknr;
310         u64 search_start = *last_ret;
311         u64 last_block = 0;
312         u64 other;
313         u32 parent_nritems;
314         int end_slot;
315         int i;
316         int err = 0;
317         int parent_level;
318         int uptodate;
319         u32 blocksize;
320         int progress_passed = 0;
321         struct btrfs_disk_key disk_key;
322
323         parent_level = btrfs_header_level(parent);
324         if (cache_only && parent_level != 1)
325                 return 0;
326
327         if (trans->transaction != root->fs_info->running_transaction) {
328                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
329                        root->fs_info->running_transaction->transid);
330                 WARN_ON(1);
331         }
332         if (trans->transid != root->fs_info->generation) {
333                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
334                        root->fs_info->generation);
335                 WARN_ON(1);
336         }
337
338         parent_nritems = btrfs_header_nritems(parent);
339         blocksize = btrfs_level_size(root, parent_level - 1);
340         end_slot = parent_nritems;
341
342         if (parent_nritems == 1)
343                 return 0;
344
345         for (i = start_slot; i < end_slot; i++) {
346                 int close = 1;
347
348                 if (!parent->map_token) {
349                         map_extent_buffer(parent,
350                                         btrfs_node_key_ptr_offset(i),
351                                         sizeof(struct btrfs_key_ptr),
352                                         &parent->map_token, &parent->kaddr,
353                                         &parent->map_start, &parent->map_len,
354                                         KM_USER1);
355                 }
356                 btrfs_node_key(parent, &disk_key, i);
357                 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
358                         continue;
359
360                 progress_passed = 1;
361                 blocknr = btrfs_node_blockptr(parent, i);
362                 if (last_block == 0)
363                         last_block = blocknr;
364
365                 if (i > 0) {
366                         other = btrfs_node_blockptr(parent, i - 1);
367                         close = close_blocks(blocknr, other, blocksize);
368                 }
369                 if (close && i < end_slot - 2) {
370                         other = btrfs_node_blockptr(parent, i + 1);
371                         close = close_blocks(blocknr, other, blocksize);
372                 }
373                 if (close) {
374                         last_block = blocknr;
375                         continue;
376                 }
377                 if (parent->map_token) {
378                         unmap_extent_buffer(parent, parent->map_token,
379                                             KM_USER1);
380                         parent->map_token = NULL;
381                 }
382
383                 cur = btrfs_find_tree_block(root, blocknr, blocksize);
384                 if (cur)
385                         uptodate = btrfs_buffer_uptodate(cur);
386                 else
387                         uptodate = 0;
388                 if (!cur || !uptodate) {
389                         if (cache_only) {
390                                 free_extent_buffer(cur);
391                                 continue;
392                         }
393                         if (!cur) {
394                                 cur = read_tree_block(root, blocknr,
395                                                          blocksize);
396                         } else if (!uptodate) {
397                                 btrfs_read_buffer(cur);
398                         }
399                 }
400                 if (search_start == 0)
401                         search_start = last_block;
402
403                 btrfs_verify_block_csum(root, cur);
404                 err = __btrfs_cow_block(trans, root, cur, parent, i,
405                                         &tmp, search_start,
406                                         min(16 * blocksize,
407                                             (end_slot - i) * blocksize));
408                 if (err) {
409                         free_extent_buffer(cur);
410                         break;
411                 }
412                 search_start = tmp->start;
413                 last_block = tmp->start;
414                 *last_ret = search_start;
415                 if (parent_level == 1)
416                         btrfs_clear_buffer_defrag(tmp);
417                 free_extent_buffer(tmp);
418         }
419         if (parent->map_token) {
420                 unmap_extent_buffer(parent, parent->map_token,
421                                     KM_USER1);
422                 parent->map_token = NULL;
423         }
424         return err;
425 }
426
427 /*
428  * The leaf data grows from end-to-front in the node.
429  * this returns the address of the start of the last item,
430  * which is the stop of the leaf data stack
431  */
432 static inline unsigned int leaf_data_end(struct btrfs_root *root,
433                                          struct extent_buffer *leaf)
434 {
435         u32 nr = btrfs_header_nritems(leaf);
436         if (nr == 0)
437                 return BTRFS_LEAF_DATA_SIZE(root);
438         return btrfs_item_offset_nr(leaf, nr - 1);
439 }
440
441 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
442                       int level)
443 {
444         struct extent_buffer *parent = NULL;
445         struct extent_buffer *node = path->nodes[level];
446         struct btrfs_disk_key parent_key;
447         struct btrfs_disk_key node_key;
448         int parent_slot;
449         int slot;
450         struct btrfs_key cpukey;
451         u32 nritems = btrfs_header_nritems(node);
452
453         if (path->nodes[level + 1])
454                 parent = path->nodes[level + 1];
455
456         slot = path->slots[level];
457         BUG_ON(nritems == 0);
458         if (parent) {
459                 parent_slot = path->slots[level + 1];
460                 btrfs_node_key(parent, &parent_key, parent_slot);
461                 btrfs_node_key(node, &node_key, 0);
462                 BUG_ON(memcmp(&parent_key, &node_key,
463                               sizeof(struct btrfs_disk_key)));
464                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
465                        btrfs_header_bytenr(node));
466         }
467         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
468         if (slot != 0) {
469                 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
470                 btrfs_node_key(node, &node_key, slot);
471                 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
472         }
473         if (slot < nritems - 1) {
474                 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
475                 btrfs_node_key(node, &node_key, slot);
476                 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
477         }
478         return 0;
479 }
480
481 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
482                       int level)
483 {
484         struct extent_buffer *leaf = path->nodes[level];
485         struct extent_buffer *parent = NULL;
486         int parent_slot;
487         struct btrfs_key cpukey;
488         struct btrfs_disk_key parent_key;
489         struct btrfs_disk_key leaf_key;
490         int slot = path->slots[0];
491
492         u32 nritems = btrfs_header_nritems(leaf);
493
494         if (path->nodes[level + 1])
495                 parent = path->nodes[level + 1];
496
497         if (nritems == 0)
498                 return 0;
499
500         if (parent) {
501                 parent_slot = path->slots[level + 1];
502                 btrfs_node_key(parent, &parent_key, parent_slot);
503                 btrfs_item_key(leaf, &leaf_key, 0);
504
505                 BUG_ON(memcmp(&parent_key, &leaf_key,
506                        sizeof(struct btrfs_disk_key)));
507                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
508                        btrfs_header_bytenr(leaf));
509         }
510 #if 0
511         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
512                 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
513                 btrfs_item_key(leaf, &leaf_key, i);
514                 if (comp_keys(&leaf_key, &cpukey) >= 0) {
515                         btrfs_print_leaf(root, leaf);
516                         printk("slot %d offset bad key\n", i);
517                         BUG_ON(1);
518                 }
519                 if (btrfs_item_offset_nr(leaf, i) !=
520                         btrfs_item_end_nr(leaf, i + 1)) {
521                         btrfs_print_leaf(root, leaf);
522                         printk("slot %d offset bad\n", i);
523                         BUG_ON(1);
524                 }
525                 if (i == 0) {
526                         if (btrfs_item_offset_nr(leaf, i) +
527                                btrfs_item_size_nr(leaf, i) !=
528                                BTRFS_LEAF_DATA_SIZE(root)) {
529                                 btrfs_print_leaf(root, leaf);
530                                 printk("slot %d first offset bad\n", i);
531                                 BUG_ON(1);
532                         }
533                 }
534         }
535         if (nritems > 0) {
536                 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
537                                 btrfs_print_leaf(root, leaf);
538                                 printk("slot %d bad size \n", nritems - 1);
539                                 BUG_ON(1);
540                 }
541         }
542 #endif
543         if (slot != 0 && slot < nritems - 1) {
544                 btrfs_item_key(leaf, &leaf_key, slot);
545                 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
546                 if (comp_keys(&leaf_key, &cpukey) <= 0) {
547                         btrfs_print_leaf(root, leaf);
548                         printk("slot %d offset bad key\n", slot);
549                         BUG_ON(1);
550                 }
551                 if (btrfs_item_offset_nr(leaf, slot - 1) !=
552                        btrfs_item_end_nr(leaf, slot)) {
553                         btrfs_print_leaf(root, leaf);
554                         printk("slot %d offset bad\n", slot);
555                         BUG_ON(1);
556                 }
557         }
558         if (slot < nritems - 1) {
559                 btrfs_item_key(leaf, &leaf_key, slot);
560                 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
561                 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
562                 if (btrfs_item_offset_nr(leaf, slot) !=
563                         btrfs_item_end_nr(leaf, slot + 1)) {
564                         btrfs_print_leaf(root, leaf);
565                         printk("slot %d offset bad\n", slot);
566                         BUG_ON(1);
567                 }
568         }
569         BUG_ON(btrfs_item_offset_nr(leaf, 0) +
570                btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
571         return 0;
572 }
573
574 static int noinline check_block(struct btrfs_root *root,
575                                 struct btrfs_path *path, int level)
576 {
577         return 0;
578 #if 0
579         struct extent_buffer *buf = path->nodes[level];
580
581         if (memcmp_extent_buffer(buf, root->fs_info->fsid,
582                                  (unsigned long)btrfs_header_fsid(buf),
583                                  BTRFS_FSID_SIZE)) {
584                 printk("warning bad block %Lu\n", buf->start);
585                 return 1;
586         }
587 #endif
588         if (level == 0)
589                 return check_leaf(root, path, level);
590         return check_node(root, path, level);
591 }
592
593 /*
594  * search for key in the extent_buffer.  The items start at offset p,
595  * and they are item_size apart.  There are 'max' items in p.
596  *
597  * the slot in the array is returned via slot, and it points to
598  * the place where you would insert key if it is not found in
599  * the array.
600  *
601  * slot may point to max if the key is bigger than all of the keys
602  */
603 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
604                               int item_size, struct btrfs_key *key,
605                               int max, int *slot)
606 {
607         int low = 0;
608         int high = max;
609         int mid;
610         int ret;
611         struct btrfs_disk_key *tmp = NULL;
612         struct btrfs_disk_key unaligned;
613         unsigned long offset;
614         char *map_token = NULL;
615         char *kaddr = NULL;
616         unsigned long map_start = 0;
617         unsigned long map_len = 0;
618         int err;
619
620         while(low < high) {
621                 mid = (low + high) / 2;
622                 offset = p + mid * item_size;
623
624                 if (!map_token || offset < map_start ||
625                     (offset + sizeof(struct btrfs_disk_key)) >
626                     map_start + map_len) {
627                         if (map_token) {
628                                 unmap_extent_buffer(eb, map_token, KM_USER0);
629                                 map_token = NULL;
630                         }
631                         err = map_extent_buffer(eb, offset,
632                                                 sizeof(struct btrfs_disk_key),
633                                                 &map_token, &kaddr,
634                                                 &map_start, &map_len, KM_USER0);
635
636                         if (!err) {
637                                 tmp = (struct btrfs_disk_key *)(kaddr + offset -
638                                                         map_start);
639                         } else {
640                                 read_extent_buffer(eb, &unaligned,
641                                                    offset, sizeof(unaligned));
642                                 tmp = &unaligned;
643                         }
644
645                 } else {
646                         tmp = (struct btrfs_disk_key *)(kaddr + offset -
647                                                         map_start);
648                 }
649                 ret = comp_keys(tmp, key);
650
651                 if (ret < 0)
652                         low = mid + 1;
653                 else if (ret > 0)
654                         high = mid;
655                 else {
656                         *slot = mid;
657                         if (map_token)
658                                 unmap_extent_buffer(eb, map_token, KM_USER0);
659                         return 0;
660                 }
661         }
662         *slot = low;
663         if (map_token)
664                 unmap_extent_buffer(eb, map_token, KM_USER0);
665         return 1;
666 }
667
668 /*
669  * simple bin_search frontend that does the right thing for
670  * leaves vs nodes
671  */
672 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
673                       int level, int *slot)
674 {
675         if (level == 0) {
676                 return generic_bin_search(eb,
677                                           offsetof(struct btrfs_leaf, items),
678                                           sizeof(struct btrfs_item),
679                                           key, btrfs_header_nritems(eb),
680                                           slot);
681         } else {
682                 return generic_bin_search(eb,
683                                           offsetof(struct btrfs_node, ptrs),
684                                           sizeof(struct btrfs_key_ptr),
685                                           key, btrfs_header_nritems(eb),
686                                           slot);
687         }
688         return -1;
689 }
690
691 static struct extent_buffer *read_node_slot(struct btrfs_root *root,
692                                    struct extent_buffer *parent, int slot)
693 {
694         if (slot < 0)
695                 return NULL;
696         if (slot >= btrfs_header_nritems(parent))
697                 return NULL;
698         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
699                        btrfs_level_size(root, btrfs_header_level(parent) - 1));
700 }
701
702 static int balance_level(struct btrfs_trans_handle *trans,
703                          struct btrfs_root *root,
704                          struct btrfs_path *path, int level)
705 {
706         struct extent_buffer *right = NULL;
707         struct extent_buffer *mid;
708         struct extent_buffer *left = NULL;
709         struct extent_buffer *parent = NULL;
710         int ret = 0;
711         int wret;
712         int pslot;
713         int orig_slot = path->slots[level];
714         int err_on_enospc = 0;
715         u64 orig_ptr;
716
717         if (level == 0)
718                 return 0;
719
720         mid = path->nodes[level];
721         WARN_ON(btrfs_header_generation(mid) != trans->transid);
722
723         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
724
725         if (level < BTRFS_MAX_LEVEL - 1)
726                 parent = path->nodes[level + 1];
727         pslot = path->slots[level + 1];
728
729         /*
730          * deal with the case where there is only one pointer in the root
731          * by promoting the node below to a root
732          */
733         if (!parent) {
734                 struct extent_buffer *child;
735
736                 if (btrfs_header_nritems(mid) != 1)
737                         return 0;
738
739                 /* promote the child to a root */
740                 child = read_node_slot(root, mid, 0);
741                 BUG_ON(!child);
742                 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
743                 BUG_ON(ret);
744
745                 root->node = child;
746                 add_root_to_dirty_list(root);
747                 path->nodes[level] = NULL;
748                 clean_tree_block(trans, root, mid);
749                 wait_on_tree_block_writeback(root, mid);
750                 /* once for the path */
751                 free_extent_buffer(mid);
752                 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
753                                         root->root_key.objectid,
754                                         btrfs_header_generation(mid), 0, 0, 1);
755                 /* once for the root ptr */
756                 free_extent_buffer(mid);
757                 return ret;
758         }
759         if (btrfs_header_nritems(mid) >
760             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
761                 return 0;
762
763         if (btrfs_header_nritems(mid) < 2)
764                 err_on_enospc = 1;
765
766         left = read_node_slot(root, parent, pslot - 1);
767         if (left) {
768                 wret = btrfs_cow_block(trans, root, left,
769                                        parent, pslot - 1, &left);
770                 if (wret) {
771                         ret = wret;
772                         goto enospc;
773                 }
774         }
775         right = read_node_slot(root, parent, pslot + 1);
776         if (right) {
777                 wret = btrfs_cow_block(trans, root, right,
778                                        parent, pslot + 1, &right);
779                 if (wret) {
780                         ret = wret;
781                         goto enospc;
782                 }
783         }
784
785         /* first, try to make some room in the middle buffer */
786         if (left) {
787                 orig_slot += btrfs_header_nritems(left);
788                 wret = push_node_left(trans, root, left, mid);
789                 if (wret < 0)
790                         ret = wret;
791                 if (btrfs_header_nritems(mid) < 2)
792                         err_on_enospc = 1;
793         }
794
795         /*
796          * then try to empty the right most buffer into the middle
797          */
798         if (right) {
799                 wret = push_node_left(trans, root, mid, right);
800                 if (wret < 0 && wret != -ENOSPC)
801                         ret = wret;
802                 if (btrfs_header_nritems(right) == 0) {
803                         u64 bytenr = right->start;
804                         u64 generation = btrfs_header_generation(parent);
805                         u32 blocksize = right->len;
806
807                         clean_tree_block(trans, root, right);
808                         wait_on_tree_block_writeback(root, right);
809                         free_extent_buffer(right);
810                         right = NULL;
811                         wret = del_ptr(trans, root, path, level + 1, pslot +
812                                        1);
813                         if (wret)
814                                 ret = wret;
815                         wret = btrfs_free_extent(trans, root, bytenr,
816                                                  blocksize,
817                                                  btrfs_header_owner(parent),
818                                                  generation, 0, 0, 1);
819                         if (wret)
820                                 ret = wret;
821                 } else {
822                         struct btrfs_disk_key right_key;
823                         btrfs_node_key(right, &right_key, 0);
824                         btrfs_set_node_key(parent, &right_key, pslot + 1);
825                         btrfs_mark_buffer_dirty(parent);
826                 }
827         }
828         if (btrfs_header_nritems(mid) == 1) {
829                 /*
830                  * we're not allowed to leave a node with one item in the
831                  * tree during a delete.  A deletion from lower in the tree
832                  * could try to delete the only pointer in this node.
833                  * So, pull some keys from the left.
834                  * There has to be a left pointer at this point because
835                  * otherwise we would have pulled some pointers from the
836                  * right
837                  */
838                 BUG_ON(!left);
839                 wret = balance_node_right(trans, root, mid, left);
840                 if (wret < 0) {
841                         ret = wret;
842                         goto enospc;
843                 }
844                 BUG_ON(wret == 1);
845         }
846         if (btrfs_header_nritems(mid) == 0) {
847                 /* we've managed to empty the middle node, drop it */
848                 u64 root_gen = btrfs_header_generation(parent);
849                 u64 bytenr = mid->start;
850                 u32 blocksize = mid->len;
851                 clean_tree_block(trans, root, mid);
852                 wait_on_tree_block_writeback(root, mid);
853                 free_extent_buffer(mid);
854                 mid = NULL;
855                 wret = del_ptr(trans, root, path, level + 1, pslot);
856                 if (wret)
857                         ret = wret;
858                 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
859                                          btrfs_header_owner(parent),
860                                          root_gen, 0, 0, 1);
861                 if (wret)
862                         ret = wret;
863         } else {
864                 /* update the parent key to reflect our changes */
865                 struct btrfs_disk_key mid_key;
866                 btrfs_node_key(mid, &mid_key, 0);
867                 btrfs_set_node_key(parent, &mid_key, pslot);
868                 btrfs_mark_buffer_dirty(parent);
869         }
870
871         /* update the path */
872         if (left) {
873                 if (btrfs_header_nritems(left) > orig_slot) {
874                         extent_buffer_get(left);
875                         path->nodes[level] = left;
876                         path->slots[level + 1] -= 1;
877                         path->slots[level] = orig_slot;
878                         if (mid)
879                                 free_extent_buffer(mid);
880                 } else {
881                         orig_slot -= btrfs_header_nritems(left);
882                         path->slots[level] = orig_slot;
883                 }
884         }
885         /* double check we haven't messed things up */
886         check_block(root, path, level);
887         if (orig_ptr !=
888             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
889                 BUG();
890 enospc:
891         if (right)
892                 free_extent_buffer(right);
893         if (left)
894                 free_extent_buffer(left);
895         return ret;
896 }
897
898 /* returns zero if the push worked, non-zero otherwise */
899 static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
900                                           struct btrfs_root *root,
901                                           struct btrfs_path *path, int level)
902 {
903         struct extent_buffer *right = NULL;
904         struct extent_buffer *mid;
905         struct extent_buffer *left = NULL;
906         struct extent_buffer *parent = NULL;
907         int ret = 0;
908         int wret;
909         int pslot;
910         int orig_slot = path->slots[level];
911         u64 orig_ptr;
912
913         if (level == 0)
914                 return 1;
915
916         mid = path->nodes[level];
917         WARN_ON(btrfs_header_generation(mid) != trans->transid);
918         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
919
920         if (level < BTRFS_MAX_LEVEL - 1)
921                 parent = path->nodes[level + 1];
922         pslot = path->slots[level + 1];
923
924         if (!parent)
925                 return 1;
926
927         left = read_node_slot(root, parent, pslot - 1);
928
929         /* first, try to make some room in the middle buffer */
930         if (left) {
931                 u32 left_nr;
932                 left_nr = btrfs_header_nritems(left);
933                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
934                         wret = 1;
935                 } else {
936                         ret = btrfs_cow_block(trans, root, left, parent,
937                                               pslot - 1, &left);
938                         if (ret)
939                                 wret = 1;
940                         else {
941                                 wret = push_node_left(trans, root,
942                                                       left, mid);
943                         }
944                 }
945                 if (wret < 0)
946                         ret = wret;
947                 if (wret == 0) {
948                         struct btrfs_disk_key disk_key;
949                         orig_slot += left_nr;
950                         btrfs_node_key(mid, &disk_key, 0);
951                         btrfs_set_node_key(parent, &disk_key, pslot);
952                         btrfs_mark_buffer_dirty(parent);
953                         if (btrfs_header_nritems(left) > orig_slot) {
954                                 path->nodes[level] = left;
955                                 path->slots[level + 1] -= 1;
956                                 path->slots[level] = orig_slot;
957                                 free_extent_buffer(mid);
958                         } else {
959                                 orig_slot -=
960                                         btrfs_header_nritems(left);
961                                 path->slots[level] = orig_slot;
962                                 free_extent_buffer(left);
963                         }
964                         return 0;
965                 }
966                 free_extent_buffer(left);
967         }
968         right= read_node_slot(root, parent, pslot + 1);
969
970         /*
971          * then try to empty the right most buffer into the middle
972          */
973         if (right) {
974                 u32 right_nr;
975                 right_nr = btrfs_header_nritems(right);
976                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
977                         wret = 1;
978                 } else {
979                         ret = btrfs_cow_block(trans, root, right,
980                                               parent, pslot + 1,
981                                               &right);
982                         if (ret)
983                                 wret = 1;
984                         else {
985                                 wret = balance_node_right(trans, root,
986                                                           right, mid);
987                         }
988                 }
989                 if (wret < 0)
990                         ret = wret;
991                 if (wret == 0) {
992                         struct btrfs_disk_key disk_key;
993
994                         btrfs_node_key(right, &disk_key, 0);
995                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
996                         btrfs_mark_buffer_dirty(parent);
997
998                         if (btrfs_header_nritems(mid) <= orig_slot) {
999                                 path->nodes[level] = right;
1000                                 path->slots[level + 1] += 1;
1001                                 path->slots[level] = orig_slot -
1002                                         btrfs_header_nritems(mid);
1003                                 free_extent_buffer(mid);
1004                         } else {
1005                                 free_extent_buffer(right);
1006                         }
1007                         return 0;
1008                 }
1009                 free_extent_buffer(right);
1010         }
1011         return 1;
1012 }
1013
1014 /*
1015  * readahead one full node of leaves
1016  */
1017 static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
1018                              int level, int slot, u64 objectid)
1019 {
1020         struct extent_buffer *node;
1021         struct btrfs_disk_key disk_key;
1022         u32 nritems;
1023         u64 search;
1024         u64 lowest_read;
1025         u64 highest_read;
1026         u64 nread = 0;
1027         int direction = path->reada;
1028         struct extent_buffer *eb;
1029         u32 nr;
1030         u32 blocksize;
1031         u32 nscan = 0;
1032
1033         if (level != 1)
1034                 return;
1035
1036         if (!path->nodes[level])
1037                 return;
1038
1039         node = path->nodes[level];
1040         search = btrfs_node_blockptr(node, slot);
1041         blocksize = btrfs_level_size(root, level - 1);
1042         eb = btrfs_find_tree_block(root, search, blocksize);
1043         if (eb) {
1044                 free_extent_buffer(eb);
1045                 return;
1046         }
1047
1048         highest_read = search;
1049         lowest_read = search;
1050
1051         nritems = btrfs_header_nritems(node);
1052         nr = slot;
1053         while(1) {
1054                 if (direction < 0) {
1055                         if (nr == 0)
1056                                 break;
1057                         nr--;
1058                 } else if (direction > 0) {
1059                         nr++;
1060                         if (nr >= nritems)
1061                                 break;
1062                 }
1063                 if (path->reada < 0 && objectid) {
1064                         btrfs_node_key(node, &disk_key, nr);
1065                         if (btrfs_disk_key_objectid(&disk_key) != objectid)
1066                                 break;
1067                 }
1068                 search = btrfs_node_blockptr(node, nr);
1069                 if ((search >= lowest_read && search <= highest_read) ||
1070                     (search < lowest_read && lowest_read - search <= 32768) ||
1071                     (search > highest_read && search - highest_read <= 32768)) {
1072                         readahead_tree_block(root, search, blocksize);
1073                         nread += blocksize;
1074                 }
1075                 nscan++;
1076                 if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
1077                         break;
1078                 if(nread > (1024 * 1024) || nscan > 128)
1079                         break;
1080
1081                 if (search < lowest_read)
1082                         lowest_read = search;
1083                 if (search > highest_read)
1084                         highest_read = search;
1085         }
1086 }
1087 /*
1088  * look for key in the tree.  path is filled in with nodes along the way
1089  * if key is found, we return zero and you can find the item in the leaf
1090  * level of the path (level 0)
1091  *
1092  * If the key isn't found, the path points to the slot where it should
1093  * be inserted, and 1 is returned.  If there are other errors during the
1094  * search a negative error number is returned.
1095  *
1096  * if ins_len > 0, nodes and leaves will be split as we walk down the
1097  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1098  * possible)
1099  */
1100 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1101                       *root, struct btrfs_key *key, struct btrfs_path *p, int
1102                       ins_len, int cow)
1103 {
1104         struct extent_buffer *b;
1105         u64 bytenr;
1106         u64 ptr_gen;
1107         int slot;
1108         int ret;
1109         int level;
1110         int should_reada = p->reada;
1111         u8 lowest_level = 0;
1112
1113         lowest_level = p->lowest_level;
1114         WARN_ON(lowest_level && ins_len);
1115         WARN_ON(p->nodes[0] != NULL);
1116         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
1117 again:
1118         b = root->node;
1119         extent_buffer_get(b);
1120         while (b) {
1121                 level = btrfs_header_level(b);
1122                 if (cow) {
1123                         int wret;
1124                         wret = btrfs_cow_block(trans, root, b,
1125                                                p->nodes[level + 1],
1126                                                p->slots[level + 1],
1127                                                &b);
1128                         if (wret) {
1129                                 free_extent_buffer(b);
1130                                 return wret;
1131                         }
1132                 }
1133                 BUG_ON(!cow && ins_len);
1134                 if (level != btrfs_header_level(b))
1135                         WARN_ON(1);
1136                 level = btrfs_header_level(b);
1137                 p->nodes[level] = b;
1138                 ret = check_block(root, p, level);
1139                 if (ret)
1140                         return -1;
1141                 ret = bin_search(b, key, level, &slot);
1142                 if (level != 0) {
1143                         if (ret && slot > 0)
1144                                 slot -= 1;
1145                         p->slots[level] = slot;
1146                         if (ins_len > 0 && btrfs_header_nritems(b) >=
1147                             BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1148                                 int sret = split_node(trans, root, p, level);
1149                                 BUG_ON(sret > 0);
1150                                 if (sret)
1151                                         return sret;
1152                                 b = p->nodes[level];
1153                                 slot = p->slots[level];
1154                         } else if (ins_len < 0) {
1155                                 int sret = balance_level(trans, root, p,
1156                                                          level);
1157                                 if (sret)
1158                                         return sret;
1159                                 b = p->nodes[level];
1160                                 if (!b) {
1161                                         btrfs_release_path(NULL, p);
1162                                         goto again;
1163                                 }
1164                                 slot = p->slots[level];
1165                                 BUG_ON(btrfs_header_nritems(b) == 1);
1166                         }
1167                         /* this is only true while dropping a snapshot */
1168                         if (level == lowest_level)
1169                                 break;
1170                         bytenr = btrfs_node_blockptr(b, slot);
1171                         ptr_gen = btrfs_node_ptr_generation(b, slot);
1172                         if (should_reada)
1173                                 reada_for_search(root, p, level, slot,
1174                                                  key->objectid);
1175                         b = read_tree_block(root, bytenr,
1176                                             btrfs_level_size(root, level - 1));
1177                         if (ptr_gen != btrfs_header_generation(b)) {
1178                                 printk("block %llu bad gen wanted %llu "
1179                                        "found %llu\n",
1180                                 (unsigned long long)b->start,
1181                                 (unsigned long long)ptr_gen,
1182                                 (unsigned long long)btrfs_header_generation(b));
1183                         }
1184                 } else {
1185                         p->slots[level] = slot;
1186                         if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
1187                             sizeof(struct btrfs_item) + ins_len) {
1188                                 int sret = split_leaf(trans, root, key,
1189                                                       p, ins_len, ret == 0);
1190                                 BUG_ON(sret > 0);
1191                                 if (sret)
1192                                         return sret;
1193                         }
1194                         return ret;
1195                 }
1196         }
1197         return 1;
1198 }
1199
1200 /*
1201  * adjust the pointers going up the tree, starting at level
1202  * making sure the right key of each node is points to 'key'.
1203  * This is used after shifting pointers to the left, so it stops
1204  * fixing up pointers when a given leaf/node is not in slot 0 of the
1205  * higher levels
1206  *
1207  * If this fails to write a tree block, it returns -1, but continues
1208  * fixing up the blocks in ram so the tree is consistent.
1209  */
1210 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1211                           struct btrfs_root *root, struct btrfs_path *path,
1212                           struct btrfs_disk_key *key, int level)
1213 {
1214         int i;
1215         int ret = 0;
1216         struct extent_buffer *t;
1217
1218         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1219                 int tslot = path->slots[i];
1220                 if (!path->nodes[i])
1221                         break;
1222                 t = path->nodes[i];
1223                 btrfs_set_node_key(t, key, tslot);
1224                 btrfs_mark_buffer_dirty(path->nodes[i]);
1225                 if (tslot != 0)
1226                         break;
1227         }
1228         return ret;
1229 }
1230
1231 /*
1232  * try to push data from one node into the next node left in the
1233  * tree.
1234  *
1235  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1236  * error, and > 0 if there was no room in the left hand block.
1237  */
1238 static int push_node_left(struct btrfs_trans_handle *trans,
1239                           struct btrfs_root *root, struct extent_buffer *dst,
1240                           struct extent_buffer *src)
1241 {
1242         int push_items = 0;
1243         int src_nritems;
1244         int dst_nritems;
1245         int ret = 0;
1246
1247         src_nritems = btrfs_header_nritems(src);
1248         dst_nritems = btrfs_header_nritems(dst);
1249         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1250         WARN_ON(btrfs_header_generation(src) != trans->transid);
1251         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1252
1253         if (push_items <= 0) {
1254                 return 1;
1255         }
1256
1257         if (src_nritems < push_items)
1258                 push_items = src_nritems;
1259
1260         copy_extent_buffer(dst, src,
1261                            btrfs_node_key_ptr_offset(dst_nritems),
1262                            btrfs_node_key_ptr_offset(0),
1263                            push_items * sizeof(struct btrfs_key_ptr));
1264
1265         if (push_items < src_nritems) {
1266                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1267                                       btrfs_node_key_ptr_offset(push_items),
1268                                       (src_nritems - push_items) *
1269                                       sizeof(struct btrfs_key_ptr));
1270         }
1271         btrfs_set_header_nritems(src, src_nritems - push_items);
1272         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1273         btrfs_mark_buffer_dirty(src);
1274         btrfs_mark_buffer_dirty(dst);
1275         return ret;
1276 }
1277
1278 /*
1279  * try to push data from one node into the next node right in the
1280  * tree.
1281  *
1282  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1283  * error, and > 0 if there was no room in the right hand block.
1284  *
1285  * this will  only push up to 1/2 the contents of the left node over
1286  */
1287 static int balance_node_right(struct btrfs_trans_handle *trans,
1288                               struct btrfs_root *root,
1289                               struct extent_buffer *dst,
1290                               struct extent_buffer *src)
1291 {
1292         int push_items = 0;
1293         int max_push;
1294         int src_nritems;
1295         int dst_nritems;
1296         int ret = 0;
1297
1298         WARN_ON(btrfs_header_generation(src) != trans->transid);
1299         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1300
1301         src_nritems = btrfs_header_nritems(src);
1302         dst_nritems = btrfs_header_nritems(dst);
1303         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1304         if (push_items <= 0)
1305                 return 1;
1306
1307         max_push = src_nritems / 2 + 1;
1308         /* don't try to empty the node */
1309         if (max_push >= src_nritems)
1310                 return 1;
1311
1312         if (max_push < push_items)
1313                 push_items = max_push;
1314
1315         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1316                                       btrfs_node_key_ptr_offset(0),
1317                                       (dst_nritems) *
1318                                       sizeof(struct btrfs_key_ptr));
1319
1320         copy_extent_buffer(dst, src,
1321                            btrfs_node_key_ptr_offset(0),
1322                            btrfs_node_key_ptr_offset(src_nritems - push_items),
1323                            push_items * sizeof(struct btrfs_key_ptr));
1324
1325         btrfs_set_header_nritems(src, src_nritems - push_items);
1326         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1327
1328         btrfs_mark_buffer_dirty(src);
1329         btrfs_mark_buffer_dirty(dst);
1330         return ret;
1331 }
1332
1333 /*
1334  * helper function to insert a new root level in the tree.
1335  * A new node is allocated, and a single item is inserted to
1336  * point to the existing root
1337  *
1338  * returns zero on success or < 0 on failure.
1339  */
1340 static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1341                            struct btrfs_root *root,
1342                            struct btrfs_path *path, int level)
1343 {
1344         u64 root_gen;
1345         u64 lower_gen;
1346         struct extent_buffer *lower;
1347         struct extent_buffer *c;
1348         struct btrfs_disk_key lower_key;
1349
1350         BUG_ON(path->nodes[level]);
1351         BUG_ON(path->nodes[level-1] != root->node);
1352
1353         if (root->ref_cows)
1354                 root_gen = trans->transid;
1355         else
1356                 root_gen = 0;
1357
1358         lower = path->nodes[level-1];
1359         if (level == 1)
1360                 btrfs_item_key(lower, &lower_key, 0);
1361         else
1362                 btrfs_node_key(lower, &lower_key, 0);
1363
1364         c = __btrfs_alloc_free_block(trans, root, root->nodesize,
1365                                    root->root_key.objectid,
1366                                    root_gen, lower_key.objectid, level,
1367                                    root->node->start, 0);
1368         if (IS_ERR(c))
1369                 return PTR_ERR(c);
1370         memset_extent_buffer(c, 0, 0, root->nodesize);
1371         btrfs_set_header_nritems(c, 1);
1372         btrfs_set_header_level(c, level);
1373         btrfs_set_header_bytenr(c, c->start);
1374         btrfs_set_header_generation(c, trans->transid);
1375         btrfs_set_header_owner(c, root->root_key.objectid);
1376
1377         write_extent_buffer(c, root->fs_info->fsid,
1378                             (unsigned long)btrfs_header_fsid(c),
1379                             BTRFS_FSID_SIZE);
1380         btrfs_set_node_key(c, &lower_key, 0);
1381         btrfs_set_node_blockptr(c, 0, lower->start);
1382         lower_gen = btrfs_header_generation(lower);
1383         WARN_ON(lower_gen == 0);
1384
1385         btrfs_set_node_ptr_generation(c, 0, lower_gen);
1386
1387         btrfs_mark_buffer_dirty(c);
1388
1389         /* the super has an extra ref to root->node */
1390         free_extent_buffer(root->node);
1391         root->node = c;
1392         add_root_to_dirty_list(root);
1393         extent_buffer_get(c);
1394         path->nodes[level] = c;
1395         path->slots[level] = 0;
1396
1397         if (root->ref_cows && lower_gen != trans->transid) {
1398                 struct btrfs_path *back_path = btrfs_alloc_path();
1399                 int ret;
1400                 ret = btrfs_insert_extent_backref(trans,
1401                                                   root->fs_info->extent_root,
1402                                                   path, lower->start,
1403                                                   root->root_key.objectid,
1404                                                   trans->transid, 0, 0);
1405                 BUG_ON(ret);
1406                 btrfs_free_path(back_path);
1407         }
1408         return 0;
1409 }
1410
1411 /*
1412  * worker function to insert a single pointer in a node.
1413  * the node should have enough room for the pointer already
1414  *
1415  * slot and level indicate where you want the key to go, and
1416  * blocknr is the block the key points to.
1417  *
1418  * returns zero on success and < 0 on any error
1419  */
1420 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1421                       *root, struct btrfs_path *path, struct btrfs_disk_key
1422                       *key, u64 bytenr, int slot, int level)
1423 {
1424         struct extent_buffer *lower;
1425         int nritems;
1426
1427         BUG_ON(!path->nodes[level]);
1428         lower = path->nodes[level];
1429         nritems = btrfs_header_nritems(lower);
1430         if (slot > nritems)
1431                 BUG();
1432         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1433                 BUG();
1434         if (slot != nritems) {
1435                 memmove_extent_buffer(lower,
1436                               btrfs_node_key_ptr_offset(slot + 1),
1437                               btrfs_node_key_ptr_offset(slot),
1438                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
1439         }
1440         btrfs_set_node_key(lower, key, slot);
1441         btrfs_set_node_blockptr(lower, slot, bytenr);
1442         WARN_ON(trans->transid == 0);
1443         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1444         btrfs_set_header_nritems(lower, nritems + 1);
1445         btrfs_mark_buffer_dirty(lower);
1446         return 0;
1447 }
1448
1449 /*
1450  * split the node at the specified level in path in two.
1451  * The path is corrected to point to the appropriate node after the split
1452  *
1453  * Before splitting this tries to make some room in the node by pushing
1454  * left and right, if either one works, it returns right away.
1455  *
1456  * returns 0 on success and < 0 on failure
1457  */
1458 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1459                       *root, struct btrfs_path *path, int level)
1460 {
1461         u64 root_gen;
1462         struct extent_buffer *c;
1463         struct extent_buffer *split;
1464         struct btrfs_disk_key disk_key;
1465         int mid;
1466         int ret;
1467         int wret;
1468         u32 c_nritems;
1469
1470         c = path->nodes[level];
1471         WARN_ON(btrfs_header_generation(c) != trans->transid);
1472         if (c == root->node) {
1473                 /* trying to split the root, lets make a new one */
1474                 ret = insert_new_root(trans, root, path, level + 1);
1475                 if (ret)
1476                         return ret;
1477         } else {
1478                 ret = push_nodes_for_insert(trans, root, path, level);
1479                 c = path->nodes[level];
1480                 if (!ret && btrfs_header_nritems(c) <
1481                     BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
1482                         return 0;
1483                 if (ret < 0)
1484                         return ret;
1485         }
1486
1487         c_nritems = btrfs_header_nritems(c);
1488         if (root->ref_cows)
1489                 root_gen = trans->transid;
1490         else
1491                 root_gen = 0;
1492
1493         btrfs_node_key(c, &disk_key, 0);
1494         split = __btrfs_alloc_free_block(trans, root, root->nodesize,
1495                                          root->root_key.objectid,
1496                                          root_gen,
1497                                          btrfs_disk_key_objectid(&disk_key),
1498                                          level, c->start, 0);
1499         if (IS_ERR(split))
1500                 return PTR_ERR(split);
1501
1502         btrfs_set_header_flags(split, btrfs_header_flags(c));
1503         btrfs_set_header_level(split, btrfs_header_level(c));
1504         btrfs_set_header_bytenr(split, split->start);
1505         btrfs_set_header_generation(split, trans->transid);
1506         btrfs_set_header_owner(split, root->root_key.objectid);
1507         btrfs_set_header_flags(split, 0);
1508         write_extent_buffer(split, root->fs_info->fsid,
1509                             (unsigned long)btrfs_header_fsid(split),
1510                             BTRFS_FSID_SIZE);
1511
1512         mid = (c_nritems + 1) / 2;
1513
1514         copy_extent_buffer(split, c,
1515                            btrfs_node_key_ptr_offset(0),
1516                            btrfs_node_key_ptr_offset(mid),
1517                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1518         btrfs_set_header_nritems(split, c_nritems - mid);
1519         btrfs_set_header_nritems(c, mid);
1520         ret = 0;
1521
1522         btrfs_mark_buffer_dirty(c);
1523         btrfs_mark_buffer_dirty(split);
1524
1525         btrfs_node_key(split, &disk_key, 0);
1526         wret = insert_ptr(trans, root, path, &disk_key, split->start,
1527                           path->slots[level + 1] + 1,
1528                           level + 1);
1529         if (wret)
1530                 ret = wret;
1531
1532         if (path->slots[level] >= mid) {
1533                 path->slots[level] -= mid;
1534                 free_extent_buffer(c);
1535                 path->nodes[level] = split;
1536                 path->slots[level + 1] += 1;
1537         } else {
1538                 free_extent_buffer(split);
1539         }
1540         return ret;
1541 }
1542
1543 /*
1544  * how many bytes are required to store the items in a leaf.  start
1545  * and nr indicate which items in the leaf to check.  This totals up the
1546  * space used both by the item structs and the item data
1547  */
1548 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1549 {
1550         int data_len;
1551         int nritems = btrfs_header_nritems(l);
1552         int end = min(nritems, start + nr) - 1;
1553
1554         if (!nr)
1555                 return 0;
1556         data_len = btrfs_item_end_nr(l, start);
1557         data_len = data_len - btrfs_item_offset_nr(l, end);
1558         data_len += sizeof(struct btrfs_item) * nr;
1559         WARN_ON(data_len < 0);
1560         return data_len;
1561 }
1562
1563 /*
1564  * The space between the end of the leaf items and
1565  * the start of the leaf data.  IOW, how much room
1566  * the leaf has left for both items and data
1567  */
1568 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1569 {
1570         int nritems = btrfs_header_nritems(leaf);
1571         int ret;
1572         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1573         if (ret < 0) {
1574                 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1575                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
1576                        leaf_space_used(leaf, 0, nritems), nritems);
1577         }
1578         return ret;
1579 }
1580
1581 /*
1582  * push some data in the path leaf to the right, trying to free up at
1583  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1584  *
1585  * returns 1 if the push failed because the other node didn't have enough
1586  * room, 0 if everything worked out and < 0 if there were major errors.
1587  */
1588 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1589                            *root, struct btrfs_path *path, int data_size,
1590                            int empty)
1591 {
1592         struct extent_buffer *left = path->nodes[0];
1593         struct extent_buffer *right;
1594         struct extent_buffer *upper;
1595         struct btrfs_disk_key disk_key;
1596         int slot;
1597         u32 i;
1598         int free_space;
1599         int push_space = 0;
1600         int push_items = 0;
1601         struct btrfs_item *item;
1602         u32 left_nritems;
1603         u32 nr;
1604         u32 right_nritems;
1605         u32 data_end;
1606         u32 this_item_size;
1607         int ret;
1608
1609         slot = path->slots[1];
1610         if (!path->nodes[1]) {
1611                 return 1;
1612         }
1613         upper = path->nodes[1];
1614         if (slot >= btrfs_header_nritems(upper) - 1)
1615                 return 1;
1616
1617         right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1),
1618                                 root->leafsize);
1619         free_space = btrfs_leaf_free_space(root, right);
1620         if (free_space < data_size + sizeof(struct btrfs_item)) {
1621                 free_extent_buffer(right);
1622                 return 1;
1623         }
1624
1625         /* cow and double check */
1626         ret = btrfs_cow_block(trans, root, right, upper,
1627                               slot + 1, &right);
1628         if (ret) {
1629                 free_extent_buffer(right);
1630                 return 1;
1631         }
1632         free_space = btrfs_leaf_free_space(root, right);
1633         if (free_space < data_size + sizeof(struct btrfs_item)) {
1634                 free_extent_buffer(right);
1635                 return 1;
1636         }
1637
1638         left_nritems = btrfs_header_nritems(left);
1639         if (left_nritems == 0) {
1640                 free_extent_buffer(right);
1641                 return 1;
1642         }
1643
1644         if (empty)
1645                 nr = 0;
1646         else
1647                 nr = 1;
1648
1649         i = left_nritems - 1;
1650         while (i >= nr) {
1651                 item = btrfs_item_nr(left, i);
1652
1653                 if (path->slots[0] == i)
1654                         push_space += data_size + sizeof(*item);
1655
1656                 if (!left->map_token) {
1657                         map_extent_buffer(left, (unsigned long)item,
1658                                         sizeof(struct btrfs_item),
1659                                         &left->map_token, &left->kaddr,
1660                                         &left->map_start, &left->map_len,
1661                                         KM_USER1);
1662                 }
1663
1664                 this_item_size = btrfs_item_size(left, item);
1665                 if (this_item_size + sizeof(*item) + push_space > free_space)
1666                         break;
1667                 push_items++;
1668                 push_space += this_item_size + sizeof(*item);
1669                 if (i == 0)
1670                         break;
1671                 i--;
1672         }
1673         if (left->map_token) {
1674                 unmap_extent_buffer(left, left->map_token, KM_USER1);
1675                 left->map_token = NULL;
1676         }
1677
1678         if (push_items == 0) {
1679                 free_extent_buffer(right);
1680                 return 1;
1681         }
1682
1683         if (!empty && push_items == left_nritems)
1684                 WARN_ON(1);
1685
1686         /* push left to right */
1687         right_nritems = btrfs_header_nritems(right);
1688
1689         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1690         push_space -= leaf_data_end(root, left);
1691
1692         /* make room in the right data area */
1693         data_end = leaf_data_end(root, right);
1694         memmove_extent_buffer(right,
1695                               btrfs_leaf_data(right) + data_end - push_space,
1696                               btrfs_leaf_data(right) + data_end,
1697                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
1698
1699         /* copy from the left data area */
1700         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1701                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1702                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1703                      push_space);
1704
1705         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1706                               btrfs_item_nr_offset(0),
1707                               right_nritems * sizeof(struct btrfs_item));
1708
1709         /* copy the items from left to right */
1710         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1711                    btrfs_item_nr_offset(left_nritems - push_items),
1712                    push_items * sizeof(struct btrfs_item));
1713
1714         /* update the item pointers */
1715         right_nritems += push_items;
1716         btrfs_set_header_nritems(right, right_nritems);
1717         push_space = BTRFS_LEAF_DATA_SIZE(root);
1718         for (i = 0; i < right_nritems; i++) {
1719                 item = btrfs_item_nr(right, i);
1720                 if (!right->map_token) {
1721                         map_extent_buffer(right, (unsigned long)item,
1722                                         sizeof(struct btrfs_item),
1723                                         &right->map_token, &right->kaddr,
1724                                         &right->map_start, &right->map_len,
1725                                         KM_USER1);
1726                 }
1727                 push_space -= btrfs_item_size(right, item);
1728                 btrfs_set_item_offset(right, item, push_space);
1729         }
1730
1731         if (right->map_token) {
1732                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1733                 right->map_token = NULL;
1734         }
1735         left_nritems -= push_items;
1736         btrfs_set_header_nritems(left, left_nritems);
1737
1738         if (left_nritems)
1739                 btrfs_mark_buffer_dirty(left);
1740         btrfs_mark_buffer_dirty(right);
1741
1742         btrfs_item_key(right, &disk_key, 0);
1743         btrfs_set_node_key(upper, &disk_key, slot + 1);
1744         btrfs_mark_buffer_dirty(upper);
1745
1746         /* then fixup the leaf pointer in the path */
1747         if (path->slots[0] >= left_nritems) {
1748                 path->slots[0] -= left_nritems;
1749                 free_extent_buffer(path->nodes[0]);
1750                 path->nodes[0] = right;
1751                 path->slots[1] += 1;
1752         } else {
1753                 free_extent_buffer(right);
1754         }
1755         return 0;
1756 }
1757 /*
1758  * push some data in the path leaf to the left, trying to free up at
1759  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1760  */
1761 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1762                           *root, struct btrfs_path *path, int data_size,
1763                           int empty)
1764 {
1765         struct btrfs_disk_key disk_key;
1766         struct extent_buffer *right = path->nodes[0];
1767         struct extent_buffer *left;
1768         int slot;
1769         int i;
1770         int free_space;
1771         int push_space = 0;
1772         int push_items = 0;
1773         struct btrfs_item *item;
1774         u32 old_left_nritems;
1775         u32 right_nritems;
1776         u32 nr;
1777         int ret = 0;
1778         int wret;
1779         u32 this_item_size;
1780         u32 old_left_item_size;
1781
1782         slot = path->slots[1];
1783         if (slot == 0)
1784                 return 1;
1785         if (!path->nodes[1])
1786                 return 1;
1787
1788         right_nritems = btrfs_header_nritems(right);
1789         if (right_nritems == 0) {
1790                 return 1;
1791         }
1792
1793         left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1],
1794                                slot - 1), root->leafsize);
1795         free_space = btrfs_leaf_free_space(root, left);
1796         if (free_space < data_size + sizeof(struct btrfs_item)) {
1797                 free_extent_buffer(left);
1798                 return 1;
1799         }
1800
1801         /* cow and double check */
1802         ret = btrfs_cow_block(trans, root, left,
1803                               path->nodes[1], slot - 1, &left);
1804         if (ret) {
1805                 /* we hit -ENOSPC, but it isn't fatal here */
1806                 free_extent_buffer(left);
1807                 return 1;
1808         }
1809
1810         free_space = btrfs_leaf_free_space(root, left);
1811         if (free_space < data_size + sizeof(struct btrfs_item)) {
1812                 free_extent_buffer(left);
1813                 return 1;
1814         }
1815
1816         if (empty)
1817                 nr = right_nritems;
1818         else
1819                 nr = right_nritems - 1;
1820
1821         for (i = 0; i < nr; i++) {
1822                 item = btrfs_item_nr(right, i);
1823                 if (!right->map_token) {
1824                         map_extent_buffer(right, (unsigned long)item,
1825                                         sizeof(struct btrfs_item),
1826                                         &right->map_token, &right->kaddr,
1827                                         &right->map_start, &right->map_len,
1828                                         KM_USER1);
1829                 }
1830
1831                 if (path->slots[0] == i)
1832                         push_space += data_size + sizeof(*item);
1833
1834                 this_item_size = btrfs_item_size(right, item);
1835                 if (this_item_size + sizeof(*item) + push_space > free_space)
1836                         break;
1837
1838                 push_items++;
1839                 push_space += this_item_size + sizeof(*item);
1840         }
1841
1842         if (right->map_token) {
1843                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1844                 right->map_token = NULL;
1845         }
1846
1847         if (push_items == 0) {
1848                 free_extent_buffer(left);
1849                 return 1;
1850         }
1851         if (!empty && push_items == btrfs_header_nritems(right))
1852                 WARN_ON(1);
1853
1854         /* push data from right to left */
1855         copy_extent_buffer(left, right,
1856                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
1857                            btrfs_item_nr_offset(0),
1858                            push_items * sizeof(struct btrfs_item));
1859
1860         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1861                      btrfs_item_offset_nr(right, push_items -1);
1862
1863         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1864                      leaf_data_end(root, left) - push_space,
1865                      btrfs_leaf_data(right) +
1866                      btrfs_item_offset_nr(right, push_items - 1),
1867                      push_space);
1868         old_left_nritems = btrfs_header_nritems(left);
1869         BUG_ON(old_left_nritems < 0);
1870
1871         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
1872         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1873                 u32 ioff;
1874
1875                 item = btrfs_item_nr(left, i);
1876                 if (!left->map_token) {
1877                         map_extent_buffer(left, (unsigned long)item,
1878                                         sizeof(struct btrfs_item),
1879                                         &left->map_token, &left->kaddr,
1880                                         &left->map_start, &left->map_len,
1881                                         KM_USER1);
1882                 }
1883
1884                 ioff = btrfs_item_offset(left, item);
1885                 btrfs_set_item_offset(left, item,
1886                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1887         }
1888         btrfs_set_header_nritems(left, old_left_nritems + push_items);
1889         if (left->map_token) {
1890                 unmap_extent_buffer(left, left->map_token, KM_USER1);
1891                 left->map_token = NULL;
1892         }
1893
1894         /* fixup right node */
1895         if (push_items > right_nritems) {
1896                 printk("push items %d nr %u\n", push_items, right_nritems);
1897                 WARN_ON(1);
1898         }
1899
1900         if (push_items < right_nritems) {
1901                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
1902                                                   leaf_data_end(root, right);
1903                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
1904                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
1905                                       btrfs_leaf_data(right) +
1906                                       leaf_data_end(root, right), push_space);
1907
1908                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1909                               btrfs_item_nr_offset(push_items),
1910                              (btrfs_header_nritems(right) - push_items) *
1911                              sizeof(struct btrfs_item));
1912         }
1913         right_nritems -= push_items;
1914         btrfs_set_header_nritems(right, right_nritems);
1915         push_space = BTRFS_LEAF_DATA_SIZE(root);
1916         for (i = 0; i < right_nritems; i++) {
1917                 item = btrfs_item_nr(right, i);
1918
1919                 if (!right->map_token) {
1920                         map_extent_buffer(right, (unsigned long)item,
1921                                         sizeof(struct btrfs_item),
1922                                         &right->map_token, &right->kaddr,
1923                                         &right->map_start, &right->map_len,
1924                                         KM_USER1);
1925                 }
1926
1927                 push_space = push_space - btrfs_item_size(right, item);
1928                 btrfs_set_item_offset(right, item, push_space);
1929         }
1930         if (right->map_token) {
1931                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1932                 right->map_token = NULL;
1933         }
1934
1935         btrfs_mark_buffer_dirty(left);
1936         if (right_nritems)
1937                 btrfs_mark_buffer_dirty(right);
1938
1939         btrfs_item_key(right, &disk_key, 0);
1940         wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1941         if (wret)
1942                 ret = wret;
1943
1944         /* then fixup the leaf pointer in the path */
1945         if (path->slots[0] < push_items) {
1946                 path->slots[0] += old_left_nritems;
1947                 free_extent_buffer(path->nodes[0]);
1948                 path->nodes[0] = left;
1949                 path->slots[1] -= 1;
1950         } else {
1951                 free_extent_buffer(left);
1952                 path->slots[0] -= push_items;
1953         }
1954         BUG_ON(path->slots[0] < 0);
1955         return ret;
1956 }
1957
1958 /*
1959  * split the path's leaf in two, making sure there is at least data_size
1960  * available for the resulting leaf level of the path.
1961  *
1962  * returns 0 if all went well and < 0 on failure.
1963  */
1964 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1965                       *root, struct btrfs_key *ins_key,
1966                       struct btrfs_path *path, int data_size, int extend)
1967 {
1968         u64 root_gen;
1969         struct extent_buffer *l;
1970         u32 nritems;
1971         int mid;
1972         int slot;
1973         struct extent_buffer *right;
1974         int space_needed = data_size + sizeof(struct btrfs_item);
1975         int data_copy_size;
1976         int rt_data_off;
1977         int i;
1978         int ret = 0;
1979         int wret;
1980         int double_split;
1981         int num_doubles = 0;
1982         struct btrfs_disk_key disk_key;
1983
1984         if (extend)
1985                 space_needed = data_size;
1986
1987         if (root->ref_cows)
1988                 root_gen = trans->transid;
1989         else
1990                 root_gen = 0;
1991
1992         /* first try to make some room by pushing left and right */
1993         if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
1994                 wret = push_leaf_right(trans, root, path, data_size, 0);
1995                 if (wret < 0) {
1996                         return wret;
1997                 }
1998                 if (wret) {
1999                         wret = push_leaf_left(trans, root, path, data_size, 0);
2000                         if (wret < 0)
2001                                 return wret;
2002                 }
2003                 l = path->nodes[0];
2004
2005                 /* did the pushes work? */
2006                 if (btrfs_leaf_free_space(root, l) >= space_needed)
2007                         return 0;
2008         }
2009
2010         if (!path->nodes[1]) {
2011                 ret = insert_new_root(trans, root, path, 1);
2012                 if (ret)
2013                         return ret;
2014         }
2015 again:
2016         double_split = 0;
2017         l = path->nodes[0];
2018         slot = path->slots[0];
2019         nritems = btrfs_header_nritems(l);
2020         mid = (nritems + 1)/ 2;
2021
2022         btrfs_item_key(l, &disk_key, 0);
2023
2024         right = __btrfs_alloc_free_block(trans, root, root->leafsize,
2025                                          root->root_key.objectid,
2026                                          root_gen, disk_key.objectid, 0,
2027                                          l->start, 0);
2028         if (IS_ERR(right))
2029                 return PTR_ERR(right);
2030
2031         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2032         btrfs_set_header_bytenr(right, right->start);
2033         btrfs_set_header_generation(right, trans->transid);
2034         btrfs_set_header_owner(right, root->root_key.objectid);
2035         btrfs_set_header_level(right, 0);
2036         write_extent_buffer(right, root->fs_info->fsid,
2037                             (unsigned long)btrfs_header_fsid(right),
2038                             BTRFS_FSID_SIZE);
2039         if (mid <= slot) {
2040                 if (nritems == 1 ||
2041                     leaf_space_used(l, mid, nritems - mid) + space_needed >
2042                         BTRFS_LEAF_DATA_SIZE(root)) {
2043                         if (slot >= nritems) {
2044                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2045                                 btrfs_set_header_nritems(right, 0);
2046                                 wret = insert_ptr(trans, root, path,
2047                                                   &disk_key, right->start,
2048                                                   path->slots[1] + 1, 1);
2049                                 if (wret)
2050                                         ret = wret;
2051                                 free_extent_buffer(path->nodes[0]);
2052                                 path->nodes[0] = right;
2053                                 path->slots[0] = 0;
2054                                 path->slots[1] += 1;
2055                                 return ret;
2056                         }
2057                         mid = slot;
2058                         if (mid != nritems &&
2059                             leaf_space_used(l, mid, nritems - mid) +
2060                             space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
2061                                 double_split = 1;
2062                         }
2063                 }
2064         } else {
2065                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
2066                         BTRFS_LEAF_DATA_SIZE(root)) {
2067                         if (!extend && slot == 0) {
2068                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2069                                 btrfs_set_header_nritems(right, 0);
2070                                 wret = insert_ptr(trans, root, path,
2071                                                   &disk_key,
2072                                                   right->start,
2073                                                   path->slots[1], 1);
2074                                 if (wret)
2075                                         ret = wret;
2076                                 free_extent_buffer(path->nodes[0]);
2077                                 path->nodes[0] = right;
2078                                 path->slots[0] = 0;
2079                                 if (path->slots[1] == 0) {
2080                                         wret = fixup_low_keys(trans, root,
2081                                                    path, &disk_key, 1);
2082                                         if (wret)
2083                                                 ret = wret;
2084                                 }
2085                                 return ret;
2086                         } else if (extend && slot == 0) {
2087                                 mid = 1;
2088                         } else {
2089                                 mid = slot;
2090                                 if (mid != nritems &&
2091                                     leaf_space_used(l, mid, nritems - mid) +
2092                                     space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
2093                                         double_split = 1;
2094                                 }
2095                         }
2096                 }
2097         }
2098         nritems = nritems - mid;
2099         btrfs_set_header_nritems(right, nritems);
2100         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2101
2102         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2103                            btrfs_item_nr_offset(mid),
2104                            nritems * sizeof(struct btrfs_item));
2105
2106         copy_extent_buffer(right, l,
2107                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2108                      data_copy_size, btrfs_leaf_data(l) +
2109                      leaf_data_end(root, l), data_copy_size);
2110
2111         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2112                       btrfs_item_end_nr(l, mid);
2113
2114         for (i = 0; i < nritems; i++) {
2115                 struct btrfs_item *item = btrfs_item_nr(right, i);
2116                 u32 ioff;
2117
2118                 if (!right->map_token) {
2119                         map_extent_buffer(right, (unsigned long)item,
2120                                         sizeof(struct btrfs_item),
2121                                         &right->map_token, &right->kaddr,
2122                                         &right->map_start, &right->map_len,
2123                                         KM_USER1);
2124                 }
2125
2126                 ioff = btrfs_item_offset(right, item);
2127                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2128         }
2129
2130         if (right->map_token) {
2131                 unmap_extent_buffer(right, right->map_token, KM_USER1);
2132                 right->map_token = NULL;
2133         }
2134
2135         btrfs_set_header_nritems(l, mid);
2136         ret = 0;
2137         btrfs_item_key(right, &disk_key, 0);
2138         wret = insert_ptr(trans, root, path, &disk_key, right->start,
2139                           path->slots[1] + 1, 1);
2140         if (wret)
2141                 ret = wret;
2142
2143         btrfs_mark_buffer_dirty(right);
2144         btrfs_mark_buffer_dirty(l);
2145         BUG_ON(path->slots[0] != slot);
2146
2147         if (mid <= slot) {
2148                 free_extent_buffer(path->nodes[0]);
2149                 path->nodes[0] = right;
2150                 path->slots[0] -= mid;
2151                 path->slots[1] += 1;
2152         } else
2153                 free_extent_buffer(right);
2154
2155         BUG_ON(path->slots[0] < 0);
2156
2157         if (double_split) {
2158                 BUG_ON(num_doubles != 0);
2159                 num_doubles++;
2160                 goto again;
2161         }
2162         return ret;
2163 }
2164
2165 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
2166                         struct btrfs_root *root,
2167                         struct btrfs_path *path,
2168                         u32 new_size, int from_end)
2169 {
2170         int ret = 0;
2171         int slot;
2172         int slot_orig;
2173         struct extent_buffer *leaf;
2174         struct btrfs_item *item;
2175         u32 nritems;
2176         unsigned int data_end;
2177         unsigned int old_data_start;
2178         unsigned int old_size;
2179         unsigned int size_diff;
2180         int i;
2181
2182         slot_orig = path->slots[0];
2183         leaf = path->nodes[0];
2184         slot = path->slots[0];
2185
2186         old_size = btrfs_item_size_nr(leaf, slot);
2187         if (old_size == new_size)
2188                 return 0;
2189
2190         nritems = btrfs_header_nritems(leaf);
2191         data_end = leaf_data_end(root, leaf);
2192
2193         old_data_start = btrfs_item_offset_nr(leaf, slot);
2194
2195         size_diff = old_size - new_size;
2196
2197         BUG_ON(slot < 0);
2198         BUG_ON(slot >= nritems);
2199
2200         /*
2201          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2202          */
2203         /* first correct the data pointers */
2204         for (i = slot; i < nritems; i++) {
2205                 u32 ioff;
2206                 item = btrfs_item_nr(leaf, i);
2207
2208                 if (!leaf->map_token) {
2209                         map_extent_buffer(leaf, (unsigned long)item,
2210                                         sizeof(struct btrfs_item),
2211                                         &leaf->map_token, &leaf->kaddr,
2212                                         &leaf->map_start, &leaf->map_len,
2213                                         KM_USER1);
2214                 }
2215
2216                 ioff = btrfs_item_offset(leaf, item);
2217                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
2218         }
2219
2220         if (leaf->map_token) {
2221                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2222                 leaf->map_token = NULL;
2223         }
2224
2225         /* shift the data */
2226         if (from_end) {
2227                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2228                               data_end + size_diff, btrfs_leaf_data(leaf) +
2229                               data_end, old_data_start + new_size - data_end);
2230         } else {
2231                 struct btrfs_disk_key disk_key;
2232                 u64 offset;
2233
2234                 btrfs_item_key(leaf, &disk_key, slot);
2235
2236                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
2237                         unsigned long ptr;
2238                         struct btrfs_file_extent_item *fi;
2239
2240                         fi = btrfs_item_ptr(leaf, slot,
2241                                             struct btrfs_file_extent_item);
2242                         fi = (struct btrfs_file_extent_item *)(
2243                              (unsigned long)fi - size_diff);
2244
2245                         if (btrfs_file_extent_type(leaf, fi) ==
2246                             BTRFS_FILE_EXTENT_INLINE) {
2247                                 ptr = btrfs_item_ptr_offset(leaf, slot);
2248                                 memmove_extent_buffer(leaf, ptr,
2249                                         (unsigned long)fi,
2250                                         offsetof(struct btrfs_file_extent_item,
2251                                                  disk_bytenr));
2252                         }
2253                 }
2254
2255                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2256                               data_end + size_diff, btrfs_leaf_data(leaf) +
2257                               data_end, old_data_start - data_end);
2258
2259                 offset = btrfs_disk_key_offset(&disk_key);
2260                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
2261                 btrfs_set_item_key(leaf, &disk_key, slot);
2262                 if (slot == 0)
2263                         fixup_low_keys(trans, root, path, &disk_key, 1);
2264         }
2265
2266         item = btrfs_item_nr(leaf, slot);
2267         btrfs_set_item_size(leaf, item, new_size);
2268         btrfs_mark_buffer_dirty(leaf);
2269
2270         ret = 0;
2271         if (btrfs_leaf_free_space(root, leaf) < 0) {
2272                 btrfs_print_leaf(root, leaf);
2273                 BUG();
2274         }
2275         return ret;
2276 }
2277
2278 int btrfs_extend_item(struct btrfs_trans_handle *trans,
2279                       struct btrfs_root *root, struct btrfs_path *path,
2280                       u32 data_size)
2281 {
2282         int ret = 0;
2283         int slot;
2284         int slot_orig;
2285         struct extent_buffer *leaf;
2286         struct btrfs_item *item;
2287         u32 nritems;
2288         unsigned int data_end;
2289         unsigned int old_data;
2290         unsigned int old_size;
2291         int i;
2292
2293         slot_orig = path->slots[0];
2294         leaf = path->nodes[0];
2295
2296         nritems = btrfs_header_nritems(leaf);
2297         data_end = leaf_data_end(root, leaf);
2298
2299         if (btrfs_leaf_free_space(root, leaf) < data_size) {
2300                 btrfs_print_leaf(root, leaf);
2301                 BUG();
2302         }
2303         slot = path->slots[0];
2304         old_data = btrfs_item_end_nr(leaf, slot);
2305
2306         BUG_ON(slot < 0);
2307         if (slot >= nritems) {
2308                 btrfs_print_leaf(root, leaf);
2309                 printk("slot %d too large, nritems %d\n", slot, nritems);
2310                 BUG_ON(1);
2311         }
2312
2313         /*
2314          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2315          */
2316         /* first correct the data pointers */
2317         for (i = slot; i < nritems; i++) {
2318                 u32 ioff;
2319                 item = btrfs_item_nr(leaf, i);
2320
2321                 if (!leaf->map_token) {
2322                         map_extent_buffer(leaf, (unsigned long)item,
2323                                         sizeof(struct btrfs_item),
2324                                         &leaf->map_token, &leaf->kaddr,
2325                                         &leaf->map_start, &leaf->map_len,
2326                                         KM_USER1);
2327                 }
2328                 ioff = btrfs_item_offset(leaf, item);
2329                 btrfs_set_item_offset(leaf, item, ioff - data_size);
2330         }
2331
2332         if (leaf->map_token) {
2333                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2334                 leaf->map_token = NULL;
2335         }
2336
2337         /* shift the data */
2338         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2339                       data_end - data_size, btrfs_leaf_data(leaf) +
2340                       data_end, old_data - data_end);
2341
2342         data_end = old_data;
2343         old_size = btrfs_item_size_nr(leaf, slot);
2344         item = btrfs_item_nr(leaf, slot);
2345         btrfs_set_item_size(leaf, item, old_size + data_size);
2346         btrfs_mark_buffer_dirty(leaf);
2347
2348         ret = 0;
2349         if (btrfs_leaf_free_space(root, leaf) < 0) {
2350                 btrfs_print_leaf(root, leaf);
2351                 BUG();
2352         }
2353         return ret;
2354 }
2355
2356 /*
2357  * Given a key and some data, insert an item into the tree.
2358  * This does all the path init required, making room in the tree if needed.
2359  */
2360 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2361                             struct btrfs_root *root,
2362                             struct btrfs_path *path,
2363                             struct btrfs_key *cpu_key, u32 *data_size,
2364                             int nr)
2365 {
2366         struct extent_buffer *leaf;
2367         struct btrfs_item *item;
2368         int ret = 0;
2369         int slot;
2370         int slot_orig;
2371         int i;
2372         u32 nritems;
2373         u32 total_size = 0;
2374         u32 total_data = 0;
2375         unsigned int data_end;
2376         struct btrfs_disk_key disk_key;
2377
2378         for (i = 0; i < nr; i++) {
2379                 total_data += data_size[i];
2380         }
2381
2382         /* create a root if there isn't one */
2383         if (!root->node)
2384                 BUG();
2385
2386         total_size = total_data + (nr - 1) * sizeof(struct btrfs_item);
2387         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
2388         if (ret == 0) {
2389                 return -EEXIST;
2390         }
2391         if (ret < 0)
2392                 goto out;
2393
2394         slot_orig = path->slots[0];
2395         leaf = path->nodes[0];
2396
2397         nritems = btrfs_header_nritems(leaf);
2398         data_end = leaf_data_end(root, leaf);
2399
2400         if (btrfs_leaf_free_space(root, leaf) <
2401             sizeof(struct btrfs_item) + total_size) {
2402                 btrfs_print_leaf(root, leaf);
2403                 printk("not enough freespace need %u have %d\n",
2404                        total_size, btrfs_leaf_free_space(root, leaf));
2405                 BUG();
2406         }
2407
2408         slot = path->slots[0];
2409         BUG_ON(slot < 0);
2410
2411         if (slot != nritems) {
2412                 int i;
2413                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2414
2415                 if (old_data < data_end) {
2416                         btrfs_print_leaf(root, leaf);
2417                         printk("slot %d old_data %d data_end %d\n",
2418                                slot, old_data, data_end);
2419                         BUG_ON(1);
2420                 }
2421                 /*
2422                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
2423                  */
2424                 /* first correct the data pointers */
2425                 WARN_ON(leaf->map_token);
2426                 for (i = slot; i < nritems; i++) {
2427                         u32 ioff;
2428
2429                         item = btrfs_item_nr(leaf, i);
2430                         if (!leaf->map_token) {
2431                                 map_extent_buffer(leaf, (unsigned long)item,
2432                                         sizeof(struct btrfs_item),
2433                                         &leaf->map_token, &leaf->kaddr,
2434                                         &leaf->map_start, &leaf->map_len,
2435                                         KM_USER1);
2436                         }
2437
2438                         ioff = btrfs_item_offset(leaf, item);
2439                         btrfs_set_item_offset(leaf, item, ioff - total_data);
2440                 }
2441                 if (leaf->map_token) {
2442                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2443                         leaf->map_token = NULL;
2444                 }
2445
2446                 /* shift the items */
2447                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
2448                               btrfs_item_nr_offset(slot),
2449                               (nritems - slot) * sizeof(struct btrfs_item));
2450
2451                 /* shift the data */
2452                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2453                               data_end - total_data, btrfs_leaf_data(leaf) +
2454                               data_end, old_data - data_end);
2455                 data_end = old_data;
2456         }
2457
2458         /* setup the item for the new data */
2459         for (i = 0; i < nr; i++) {
2460                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
2461                 btrfs_set_item_key(leaf, &disk_key, slot + i);
2462                 item = btrfs_item_nr(leaf, slot + i);
2463                 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
2464                 data_end -= data_size[i];
2465                 btrfs_set_item_size(leaf, item, data_size[i]);
2466         }
2467         btrfs_set_header_nritems(leaf, nritems + nr);
2468         btrfs_mark_buffer_dirty(leaf);
2469
2470         ret = 0;
2471         if (slot == 0) {
2472                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2473                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
2474         }
2475
2476         if (btrfs_leaf_free_space(root, leaf) < 0) {
2477                 btrfs_print_leaf(root, leaf);
2478                 BUG();
2479         }
2480
2481 out:
2482         return ret;
2483 }
2484
2485 /*
2486  * Given a key and some data, insert an item into the tree.
2487  * This does all the path init required, making room in the tree if needed.
2488  */
2489 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2490                       *root, struct btrfs_key *cpu_key, void *data, u32
2491                       data_size)
2492 {
2493         int ret = 0;
2494         struct btrfs_path *path;
2495         struct extent_buffer *leaf;
2496         unsigned long ptr;
2497
2498         path = btrfs_alloc_path();
2499         BUG_ON(!path);
2500         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2501         if (!ret) {
2502                 leaf = path->nodes[0];
2503                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2504                 write_extent_buffer(leaf, data, ptr, data_size);
2505                 btrfs_mark_buffer_dirty(leaf);
2506         }
2507         btrfs_free_path(path);
2508         return ret;
2509 }
2510
2511 /*
2512  * delete the pointer from a given node.
2513  *
2514  * If the delete empties a node, the node is removed from the tree,
2515  * continuing all the way the root if required.  The root is converted into
2516  * a leaf if all the nodes are emptied.
2517  */
2518 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2519                    struct btrfs_path *path, int level, int slot)
2520 {
2521         struct extent_buffer *parent = path->nodes[level];
2522         u32 nritems;
2523         int ret = 0;
2524         int wret;
2525
2526         nritems = btrfs_header_nritems(parent);
2527         if (slot != nritems -1) {
2528                 memmove_extent_buffer(parent,
2529                               btrfs_node_key_ptr_offset(slot),
2530                               btrfs_node_key_ptr_offset(slot + 1),
2531                               sizeof(struct btrfs_key_ptr) *
2532                               (nritems - slot - 1));
2533         }
2534         nritems--;
2535         btrfs_set_header_nritems(parent, nritems);
2536         if (nritems == 0 && parent == root->node) {
2537                 BUG_ON(btrfs_header_level(root->node) != 1);
2538                 /* just turn the root into a leaf and break */
2539                 btrfs_set_header_level(root->node, 0);
2540         } else if (slot == 0) {
2541                 struct btrfs_disk_key disk_key;
2542
2543                 btrfs_node_key(parent, &disk_key, 0);
2544                 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
2545                 if (wret)
2546                         ret = wret;
2547         }
2548         btrfs_mark_buffer_dirty(parent);
2549         return ret;
2550 }
2551
2552 /*
2553  * delete the item at the leaf level in path.  If that empties
2554  * the leaf, remove it from the tree
2555  */
2556 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2557                     struct btrfs_path *path, int slot, int nr)
2558 {
2559         struct extent_buffer *leaf;
2560         struct btrfs_item *item;
2561         int last_off;
2562         int dsize = 0;
2563         int ret = 0;
2564         int wret;
2565         int i;
2566         u32 nritems;
2567
2568         leaf = path->nodes[0];
2569         last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
2570
2571         for (i = 0; i < nr; i++)
2572                 dsize += btrfs_item_size_nr(leaf, slot + i);
2573
2574         nritems = btrfs_header_nritems(leaf);
2575
2576         if (slot + nr != nritems) {
2577                 int i;
2578                 int data_end = leaf_data_end(root, leaf);
2579
2580                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2581                               data_end + dsize,
2582                               btrfs_leaf_data(leaf) + data_end,
2583                               last_off - data_end);
2584
2585                 for (i = slot + nr; i < nritems; i++) {
2586                         u32 ioff;
2587
2588                         item = btrfs_item_nr(leaf, i);
2589                         if (!leaf->map_token) {
2590                                 map_extent_buffer(leaf, (unsigned long)item,
2591                                         sizeof(struct btrfs_item),
2592                                         &leaf->map_token, &leaf->kaddr,
2593                                         &leaf->map_start, &leaf->map_len,
2594                                         KM_USER1);
2595                         }
2596                         ioff = btrfs_item_offset(leaf, item);
2597                         btrfs_set_item_offset(leaf, item, ioff + dsize);
2598                 }
2599
2600                 if (leaf->map_token) {
2601                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2602                         leaf->map_token = NULL;
2603                 }
2604
2605                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2606                               btrfs_item_nr_offset(slot + nr),
2607                               sizeof(struct btrfs_item) *
2608                               (nritems - slot - nr));
2609         }
2610         btrfs_set_header_nritems(leaf, nritems - nr);
2611         nritems -= nr;
2612
2613         /* delete the leaf if we've emptied it */
2614         if (nritems == 0) {
2615                 if (leaf == root->node) {
2616                         btrfs_set_header_level(leaf, 0);
2617                 } else {
2618                         u64 root_gen = btrfs_header_generation(path->nodes[1]);
2619                         clean_tree_block(trans, root, leaf);
2620                         wait_on_tree_block_writeback(root, leaf);
2621                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
2622                         if (wret)
2623                                 ret = wret;
2624                         wret = btrfs_free_extent(trans, root,
2625                                          leaf->start, leaf->len,
2626                                          btrfs_header_owner(path->nodes[1]),
2627                                          root_gen, 0, 0, 1);
2628                         if (wret)
2629                                 ret = wret;
2630                 }
2631         } else {
2632                 int used = leaf_space_used(leaf, 0, nritems);
2633                 if (slot == 0) {
2634                         struct btrfs_disk_key disk_key;
2635
2636                         btrfs_item_key(leaf, &disk_key, 0);
2637                         wret = fixup_low_keys(trans, root, path,
2638                                               &disk_key, 1);
2639                         if (wret)
2640                                 ret = wret;
2641                 }
2642
2643                 /* delete the leaf if it is mostly empty */
2644                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
2645                         /* push_leaf_left fixes the path.
2646                          * make sure the path still points to our leaf
2647                          * for possible call to del_ptr below
2648                          */
2649                         slot = path->slots[1];
2650                         extent_buffer_get(leaf);
2651
2652                         wret = push_leaf_left(trans, root, path, 1, 1);
2653                         if (wret < 0 && wret != -ENOSPC)
2654                                 ret = wret;
2655
2656                         if (path->nodes[0] == leaf &&
2657                             btrfs_header_nritems(leaf)) {
2658                                 wret = push_leaf_right(trans, root, path, 1, 1);
2659                                 if (wret < 0 && wret != -ENOSPC)
2660                                         ret = wret;
2661                         }
2662
2663                         if (btrfs_header_nritems(leaf) == 0) {
2664                                 u64 root_gen;
2665                                 u64 bytenr = leaf->start;
2666                                 u32 blocksize = leaf->len;
2667
2668                                 root_gen = btrfs_header_generation(
2669                                                            path->nodes[1]);
2670
2671                                 clean_tree_block(trans, root, leaf);
2672                                 wait_on_tree_block_writeback(root, leaf);
2673
2674                                 wret = del_ptr(trans, root, path, 1, slot);
2675                                 if (wret)
2676                                         ret = wret;
2677
2678                                 free_extent_buffer(leaf);
2679                                 wret = btrfs_free_extent(trans, root, bytenr,
2680                                              blocksize,
2681                                              btrfs_header_owner(path->nodes[1]),
2682                                              root_gen, 0, 0, 1);
2683                                 if (wret)
2684                                         ret = wret;
2685                         } else {
2686                                 btrfs_mark_buffer_dirty(leaf);
2687                                 free_extent_buffer(leaf);
2688                         }
2689                 } else {
2690                         btrfs_mark_buffer_dirty(leaf);
2691                 }
2692         }
2693         return ret;
2694 }
2695
2696 /*
2697  * walk up the tree as far as required to find the previous leaf.
2698  * returns 0 if it found something or 1 if there are no lesser leaves.
2699  * returns < 0 on io errors.
2700  */
2701 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
2702 {
2703         u64 bytenr;
2704         int slot;
2705         int level = 1;
2706         struct extent_buffer *c;
2707         struct extent_buffer *next = NULL;
2708
2709         while(level < BTRFS_MAX_LEVEL) {
2710                 if (!path->nodes[level])
2711                         return 1;
2712
2713                 slot = path->slots[level];
2714                 c = path->nodes[level];
2715                 if (slot == 0) {
2716                         level++;
2717                         if (level == BTRFS_MAX_LEVEL)
2718                                 return 1;
2719                         continue;
2720                 }
2721                 slot--;
2722
2723                 bytenr = btrfs_node_blockptr(c, slot);
2724                 if (next)
2725                         free_extent_buffer(next);
2726
2727                 next = read_tree_block(root, bytenr,
2728                                        btrfs_level_size(root, level - 1));
2729                 break;
2730         }
2731         path->slots[level] = slot;
2732         while(1) {
2733                 level--;
2734                 c = path->nodes[level];
2735                 free_extent_buffer(c);
2736                 slot = btrfs_header_nritems(next);
2737                 if (slot != 0)
2738                         slot--;
2739                 path->nodes[level] = next;
2740                 path->slots[level] = slot;
2741                 if (!level)
2742                         break;
2743                 next = read_tree_block(root, btrfs_node_blockptr(next, slot),
2744                                        btrfs_level_size(root, level - 1));
2745         }
2746         return 0;
2747 }
2748
2749 /*
2750  * walk up the tree as far as required to find the next leaf.
2751  * returns 0 if it found something or 1 if there are no greater leaves.
2752  * returns < 0 on io errors.
2753  */
2754 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2755 {
2756         int slot;
2757         int level = 1;
2758         u64 bytenr;
2759         struct extent_buffer *c;
2760         struct extent_buffer *next = NULL;
2761
2762         while(level < BTRFS_MAX_LEVEL) {
2763                 if (!path->nodes[level])
2764                         return 1;
2765
2766                 slot = path->slots[level] + 1;
2767                 c = path->nodes[level];
2768                 if (slot >= btrfs_header_nritems(c)) {
2769                         level++;
2770                         if (level == BTRFS_MAX_LEVEL)
2771                                 return 1;
2772                         continue;
2773                 }
2774
2775                 bytenr = btrfs_node_blockptr(c, slot);
2776                 if (next)
2777                         free_extent_buffer(next);
2778
2779                 if (path->reada)
2780                         reada_for_search(root, path, level, slot, 0);
2781
2782                 next = read_tree_block(root, bytenr,
2783                                        btrfs_level_size(root, level -1));
2784                 break;
2785         }
2786         path->slots[level] = slot;
2787         while(1) {
2788                 level--;
2789                 c = path->nodes[level];
2790                 free_extent_buffer(c);
2791                 path->nodes[level] = next;
2792                 path->slots[level] = 0;
2793                 if (!level)
2794                         break;
2795                 if (path->reada)
2796                         reada_for_search(root, path, level, 0, 0);
2797                 next = read_tree_block(root, btrfs_node_blockptr(next, 0),
2798                                        btrfs_level_size(root, level - 1));
2799         }
2800         return 0;
2801 }
2802
2803 int btrfs_previous_item(struct btrfs_root *root,
2804                         struct btrfs_path *path, u64 min_objectid,
2805                         int type)
2806 {
2807         struct btrfs_key found_key;
2808         struct extent_buffer *leaf;
2809         int ret;
2810
2811         while(1) {
2812                 if (path->slots[0] == 0) {
2813                         ret = btrfs_prev_leaf(root, path);
2814                         if (ret != 0)
2815                                 return ret;
2816                 } else {
2817                         path->slots[0]--;
2818                 }
2819                 leaf = path->nodes[0];
2820                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2821                 if (found_key.type == type)
2822                         return 0;
2823         }
2824         return 1;
2825 }
2826