6dfa92e5ceed239a7e1928ad55dc3f31eb50fdf5
[sfrench/cifs-2.6.git] / fs / btrfs / relocation.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2009 Oracle.  All rights reserved.
4  */
5
6 #include <linux/sched.h>
7 #include <linux/pagemap.h>
8 #include <linux/writeback.h>
9 #include <linux/blkdev.h>
10 #include <linux/rbtree.h>
11 #include <linux/slab.h>
12 #include "ctree.h"
13 #include "disk-io.h"
14 #include "transaction.h"
15 #include "volumes.h"
16 #include "locking.h"
17 #include "btrfs_inode.h"
18 #include "async-thread.h"
19 #include "free-space-cache.h"
20 #include "inode-map.h"
21 #include "qgroup.h"
22 #include "print-tree.h"
23
24 /*
25  * backref_node, mapping_node and tree_block start with this
26  */
27 struct tree_entry {
28         struct rb_node rb_node;
29         u64 bytenr;
30 };
31
32 /*
33  * present a tree block in the backref cache
34  */
35 struct backref_node {
36         struct rb_node rb_node;
37         u64 bytenr;
38
39         u64 new_bytenr;
40         /* objectid of tree block owner, can be not uptodate */
41         u64 owner;
42         /* link to pending, changed or detached list */
43         struct list_head list;
44         /* list of upper level blocks reference this block */
45         struct list_head upper;
46         /* list of child blocks in the cache */
47         struct list_head lower;
48         /* NULL if this node is not tree root */
49         struct btrfs_root *root;
50         /* extent buffer got by COW the block */
51         struct extent_buffer *eb;
52         /* level of tree block */
53         unsigned int level:8;
54         /* is the block in non-reference counted tree */
55         unsigned int cowonly:1;
56         /* 1 if no child node in the cache */
57         unsigned int lowest:1;
58         /* is the extent buffer locked */
59         unsigned int locked:1;
60         /* has the block been processed */
61         unsigned int processed:1;
62         /* have backrefs of this block been checked */
63         unsigned int checked:1;
64         /*
65          * 1 if corresponding block has been cowed but some upper
66          * level block pointers may not point to the new location
67          */
68         unsigned int pending:1;
69         /*
70          * 1 if the backref node isn't connected to any other
71          * backref node.
72          */
73         unsigned int detached:1;
74 };
75
76 /*
77  * present a block pointer in the backref cache
78  */
79 struct backref_edge {
80         struct list_head list[2];
81         struct backref_node *node[2];
82 };
83
84 #define LOWER   0
85 #define UPPER   1
86 #define RELOCATION_RESERVED_NODES       256
87
88 struct backref_cache {
89         /* red black tree of all backref nodes in the cache */
90         struct rb_root rb_root;
91         /* for passing backref nodes to btrfs_reloc_cow_block */
92         struct backref_node *path[BTRFS_MAX_LEVEL];
93         /*
94          * list of blocks that have been cowed but some block
95          * pointers in upper level blocks may not reflect the
96          * new location
97          */
98         struct list_head pending[BTRFS_MAX_LEVEL];
99         /* list of backref nodes with no child node */
100         struct list_head leaves;
101         /* list of blocks that have been cowed in current transaction */
102         struct list_head changed;
103         /* list of detached backref node. */
104         struct list_head detached;
105
106         u64 last_trans;
107
108         int nr_nodes;
109         int nr_edges;
110 };
111
112 /*
113  * map address of tree root to tree
114  */
115 struct mapping_node {
116         struct rb_node rb_node;
117         u64 bytenr;
118         void *data;
119 };
120
121 struct mapping_tree {
122         struct rb_root rb_root;
123         spinlock_t lock;
124 };
125
126 /*
127  * present a tree block to process
128  */
129 struct tree_block {
130         struct rb_node rb_node;
131         u64 bytenr;
132         struct btrfs_key key;
133         unsigned int level:8;
134         unsigned int key_ready:1;
135 };
136
137 #define MAX_EXTENTS 128
138
139 struct file_extent_cluster {
140         u64 start;
141         u64 end;
142         u64 boundary[MAX_EXTENTS];
143         unsigned int nr;
144 };
145
146 struct reloc_control {
147         /* block group to relocate */
148         struct btrfs_block_group_cache *block_group;
149         /* extent tree */
150         struct btrfs_root *extent_root;
151         /* inode for moving data */
152         struct inode *data_inode;
153
154         struct btrfs_block_rsv *block_rsv;
155
156         struct backref_cache backref_cache;
157
158         struct file_extent_cluster cluster;
159         /* tree blocks have been processed */
160         struct extent_io_tree processed_blocks;
161         /* map start of tree root to corresponding reloc tree */
162         struct mapping_tree reloc_root_tree;
163         /* list of reloc trees */
164         struct list_head reloc_roots;
165         /* size of metadata reservation for merging reloc trees */
166         u64 merging_rsv_size;
167         /* size of relocated tree nodes */
168         u64 nodes_relocated;
169         /* reserved size for block group relocation*/
170         u64 reserved_bytes;
171
172         u64 search_start;
173         u64 extents_found;
174
175         unsigned int stage:8;
176         unsigned int create_reloc_tree:1;
177         unsigned int merge_reloc_tree:1;
178         unsigned int found_file_extent:1;
179 };
180
181 /* stages of data relocation */
182 #define MOVE_DATA_EXTENTS       0
183 #define UPDATE_DATA_PTRS        1
184
185 static void remove_backref_node(struct backref_cache *cache,
186                                 struct backref_node *node);
187 static void __mark_block_processed(struct reloc_control *rc,
188                                    struct backref_node *node);
189
190 static void mapping_tree_init(struct mapping_tree *tree)
191 {
192         tree->rb_root = RB_ROOT;
193         spin_lock_init(&tree->lock);
194 }
195
196 static void backref_cache_init(struct backref_cache *cache)
197 {
198         int i;
199         cache->rb_root = RB_ROOT;
200         for (i = 0; i < BTRFS_MAX_LEVEL; i++)
201                 INIT_LIST_HEAD(&cache->pending[i]);
202         INIT_LIST_HEAD(&cache->changed);
203         INIT_LIST_HEAD(&cache->detached);
204         INIT_LIST_HEAD(&cache->leaves);
205 }
206
207 static void backref_cache_cleanup(struct backref_cache *cache)
208 {
209         struct backref_node *node;
210         int i;
211
212         while (!list_empty(&cache->detached)) {
213                 node = list_entry(cache->detached.next,
214                                   struct backref_node, list);
215                 remove_backref_node(cache, node);
216         }
217
218         while (!list_empty(&cache->leaves)) {
219                 node = list_entry(cache->leaves.next,
220                                   struct backref_node, lower);
221                 remove_backref_node(cache, node);
222         }
223
224         cache->last_trans = 0;
225
226         for (i = 0; i < BTRFS_MAX_LEVEL; i++)
227                 ASSERT(list_empty(&cache->pending[i]));
228         ASSERT(list_empty(&cache->changed));
229         ASSERT(list_empty(&cache->detached));
230         ASSERT(RB_EMPTY_ROOT(&cache->rb_root));
231         ASSERT(!cache->nr_nodes);
232         ASSERT(!cache->nr_edges);
233 }
234
235 static struct backref_node *alloc_backref_node(struct backref_cache *cache)
236 {
237         struct backref_node *node;
238
239         node = kzalloc(sizeof(*node), GFP_NOFS);
240         if (node) {
241                 INIT_LIST_HEAD(&node->list);
242                 INIT_LIST_HEAD(&node->upper);
243                 INIT_LIST_HEAD(&node->lower);
244                 RB_CLEAR_NODE(&node->rb_node);
245                 cache->nr_nodes++;
246         }
247         return node;
248 }
249
250 static void free_backref_node(struct backref_cache *cache,
251                               struct backref_node *node)
252 {
253         if (node) {
254                 cache->nr_nodes--;
255                 kfree(node);
256         }
257 }
258
259 static struct backref_edge *alloc_backref_edge(struct backref_cache *cache)
260 {
261         struct backref_edge *edge;
262
263         edge = kzalloc(sizeof(*edge), GFP_NOFS);
264         if (edge)
265                 cache->nr_edges++;
266         return edge;
267 }
268
269 static void free_backref_edge(struct backref_cache *cache,
270                               struct backref_edge *edge)
271 {
272         if (edge) {
273                 cache->nr_edges--;
274                 kfree(edge);
275         }
276 }
277
278 static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
279                                    struct rb_node *node)
280 {
281         struct rb_node **p = &root->rb_node;
282         struct rb_node *parent = NULL;
283         struct tree_entry *entry;
284
285         while (*p) {
286                 parent = *p;
287                 entry = rb_entry(parent, struct tree_entry, rb_node);
288
289                 if (bytenr < entry->bytenr)
290                         p = &(*p)->rb_left;
291                 else if (bytenr > entry->bytenr)
292                         p = &(*p)->rb_right;
293                 else
294                         return parent;
295         }
296
297         rb_link_node(node, parent, p);
298         rb_insert_color(node, root);
299         return NULL;
300 }
301
302 static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
303 {
304         struct rb_node *n = root->rb_node;
305         struct tree_entry *entry;
306
307         while (n) {
308                 entry = rb_entry(n, struct tree_entry, rb_node);
309
310                 if (bytenr < entry->bytenr)
311                         n = n->rb_left;
312                 else if (bytenr > entry->bytenr)
313                         n = n->rb_right;
314                 else
315                         return n;
316         }
317         return NULL;
318 }
319
320 static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr)
321 {
322
323         struct btrfs_fs_info *fs_info = NULL;
324         struct backref_node *bnode = rb_entry(rb_node, struct backref_node,
325                                               rb_node);
326         if (bnode->root)
327                 fs_info = bnode->root->fs_info;
328         btrfs_panic(fs_info, errno,
329                     "Inconsistency in backref cache found at offset %llu",
330                     bytenr);
331 }
332
333 /*
334  * walk up backref nodes until reach node presents tree root
335  */
336 static struct backref_node *walk_up_backref(struct backref_node *node,
337                                             struct backref_edge *edges[],
338                                             int *index)
339 {
340         struct backref_edge *edge;
341         int idx = *index;
342
343         while (!list_empty(&node->upper)) {
344                 edge = list_entry(node->upper.next,
345                                   struct backref_edge, list[LOWER]);
346                 edges[idx++] = edge;
347                 node = edge->node[UPPER];
348         }
349         BUG_ON(node->detached);
350         *index = idx;
351         return node;
352 }
353
354 /*
355  * walk down backref nodes to find start of next reference path
356  */
357 static struct backref_node *walk_down_backref(struct backref_edge *edges[],
358                                               int *index)
359 {
360         struct backref_edge *edge;
361         struct backref_node *lower;
362         int idx = *index;
363
364         while (idx > 0) {
365                 edge = edges[idx - 1];
366                 lower = edge->node[LOWER];
367                 if (list_is_last(&edge->list[LOWER], &lower->upper)) {
368                         idx--;
369                         continue;
370                 }
371                 edge = list_entry(edge->list[LOWER].next,
372                                   struct backref_edge, list[LOWER]);
373                 edges[idx - 1] = edge;
374                 *index = idx;
375                 return edge->node[UPPER];
376         }
377         *index = 0;
378         return NULL;
379 }
380
381 static void unlock_node_buffer(struct backref_node *node)
382 {
383         if (node->locked) {
384                 btrfs_tree_unlock(node->eb);
385                 node->locked = 0;
386         }
387 }
388
389 static void drop_node_buffer(struct backref_node *node)
390 {
391         if (node->eb) {
392                 unlock_node_buffer(node);
393                 free_extent_buffer(node->eb);
394                 node->eb = NULL;
395         }
396 }
397
398 static void drop_backref_node(struct backref_cache *tree,
399                               struct backref_node *node)
400 {
401         BUG_ON(!list_empty(&node->upper));
402
403         drop_node_buffer(node);
404         list_del(&node->list);
405         list_del(&node->lower);
406         if (!RB_EMPTY_NODE(&node->rb_node))
407                 rb_erase(&node->rb_node, &tree->rb_root);
408         free_backref_node(tree, node);
409 }
410
411 /*
412  * remove a backref node from the backref cache
413  */
414 static void remove_backref_node(struct backref_cache *cache,
415                                 struct backref_node *node)
416 {
417         struct backref_node *upper;
418         struct backref_edge *edge;
419
420         if (!node)
421                 return;
422
423         BUG_ON(!node->lowest && !node->detached);
424         while (!list_empty(&node->upper)) {
425                 edge = list_entry(node->upper.next, struct backref_edge,
426                                   list[LOWER]);
427                 upper = edge->node[UPPER];
428                 list_del(&edge->list[LOWER]);
429                 list_del(&edge->list[UPPER]);
430                 free_backref_edge(cache, edge);
431
432                 if (RB_EMPTY_NODE(&upper->rb_node)) {
433                         BUG_ON(!list_empty(&node->upper));
434                         drop_backref_node(cache, node);
435                         node = upper;
436                         node->lowest = 1;
437                         continue;
438                 }
439                 /*
440                  * add the node to leaf node list if no other
441                  * child block cached.
442                  */
443                 if (list_empty(&upper->lower)) {
444                         list_add_tail(&upper->lower, &cache->leaves);
445                         upper->lowest = 1;
446                 }
447         }
448
449         drop_backref_node(cache, node);
450 }
451
452 static void update_backref_node(struct backref_cache *cache,
453                                 struct backref_node *node, u64 bytenr)
454 {
455         struct rb_node *rb_node;
456         rb_erase(&node->rb_node, &cache->rb_root);
457         node->bytenr = bytenr;
458         rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
459         if (rb_node)
460                 backref_tree_panic(rb_node, -EEXIST, bytenr);
461 }
462
463 /*
464  * update backref cache after a transaction commit
465  */
466 static int update_backref_cache(struct btrfs_trans_handle *trans,
467                                 struct backref_cache *cache)
468 {
469         struct backref_node *node;
470         int level = 0;
471
472         if (cache->last_trans == 0) {
473                 cache->last_trans = trans->transid;
474                 return 0;
475         }
476
477         if (cache->last_trans == trans->transid)
478                 return 0;
479
480         /*
481          * detached nodes are used to avoid unnecessary backref
482          * lookup. transaction commit changes the extent tree.
483          * so the detached nodes are no longer useful.
484          */
485         while (!list_empty(&cache->detached)) {
486                 node = list_entry(cache->detached.next,
487                                   struct backref_node, list);
488                 remove_backref_node(cache, node);
489         }
490
491         while (!list_empty(&cache->changed)) {
492                 node = list_entry(cache->changed.next,
493                                   struct backref_node, list);
494                 list_del_init(&node->list);
495                 BUG_ON(node->pending);
496                 update_backref_node(cache, node, node->new_bytenr);
497         }
498
499         /*
500          * some nodes can be left in the pending list if there were
501          * errors during processing the pending nodes.
502          */
503         for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
504                 list_for_each_entry(node, &cache->pending[level], list) {
505                         BUG_ON(!node->pending);
506                         if (node->bytenr == node->new_bytenr)
507                                 continue;
508                         update_backref_node(cache, node, node->new_bytenr);
509                 }
510         }
511
512         cache->last_trans = 0;
513         return 1;
514 }
515
516
517 static int should_ignore_root(struct btrfs_root *root)
518 {
519         struct btrfs_root *reloc_root;
520
521         if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
522                 return 0;
523
524         reloc_root = root->reloc_root;
525         if (!reloc_root)
526                 return 0;
527
528         if (btrfs_root_last_snapshot(&reloc_root->root_item) ==
529             root->fs_info->running_transaction->transid - 1)
530                 return 0;
531         /*
532          * if there is reloc tree and it was created in previous
533          * transaction backref lookup can find the reloc tree,
534          * so backref node for the fs tree root is useless for
535          * relocation.
536          */
537         return 1;
538 }
539 /*
540  * find reloc tree by address of tree root
541  */
542 static struct btrfs_root *find_reloc_root(struct reloc_control *rc,
543                                           u64 bytenr)
544 {
545         struct rb_node *rb_node;
546         struct mapping_node *node;
547         struct btrfs_root *root = NULL;
548
549         spin_lock(&rc->reloc_root_tree.lock);
550         rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr);
551         if (rb_node) {
552                 node = rb_entry(rb_node, struct mapping_node, rb_node);
553                 root = (struct btrfs_root *)node->data;
554         }
555         spin_unlock(&rc->reloc_root_tree.lock);
556         return root;
557 }
558
559 static int is_cowonly_root(u64 root_objectid)
560 {
561         if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
562             root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
563             root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
564             root_objectid == BTRFS_DEV_TREE_OBJECTID ||
565             root_objectid == BTRFS_TREE_LOG_OBJECTID ||
566             root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
567             root_objectid == BTRFS_UUID_TREE_OBJECTID ||
568             root_objectid == BTRFS_QUOTA_TREE_OBJECTID ||
569             root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
570                 return 1;
571         return 0;
572 }
573
574 static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
575                                         u64 root_objectid)
576 {
577         struct btrfs_key key;
578
579         key.objectid = root_objectid;
580         key.type = BTRFS_ROOT_ITEM_KEY;
581         if (is_cowonly_root(root_objectid))
582                 key.offset = 0;
583         else
584                 key.offset = (u64)-1;
585
586         return btrfs_get_fs_root(fs_info, &key, false);
587 }
588
589 static noinline_for_stack
590 int find_inline_backref(struct extent_buffer *leaf, int slot,
591                         unsigned long *ptr, unsigned long *end)
592 {
593         struct btrfs_key key;
594         struct btrfs_extent_item *ei;
595         struct btrfs_tree_block_info *bi;
596         u32 item_size;
597
598         btrfs_item_key_to_cpu(leaf, &key, slot);
599
600         item_size = btrfs_item_size_nr(leaf, slot);
601         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
602         WARN_ON(!(btrfs_extent_flags(leaf, ei) &
603                   BTRFS_EXTENT_FLAG_TREE_BLOCK));
604
605         if (key.type == BTRFS_EXTENT_ITEM_KEY &&
606             item_size <= sizeof(*ei) + sizeof(*bi)) {
607                 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
608                 return 1;
609         }
610         if (key.type == BTRFS_METADATA_ITEM_KEY &&
611             item_size <= sizeof(*ei)) {
612                 WARN_ON(item_size < sizeof(*ei));
613                 return 1;
614         }
615
616         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
617                 bi = (struct btrfs_tree_block_info *)(ei + 1);
618                 *ptr = (unsigned long)(bi + 1);
619         } else {
620                 *ptr = (unsigned long)(ei + 1);
621         }
622         *end = (unsigned long)ei + item_size;
623         return 0;
624 }
625
626 /*
627  * build backref tree for a given tree block. root of the backref tree
628  * corresponds the tree block, leaves of the backref tree correspond
629  * roots of b-trees that reference the tree block.
630  *
631  * the basic idea of this function is check backrefs of a given block
632  * to find upper level blocks that reference the block, and then check
633  * backrefs of these upper level blocks recursively. the recursion stop
634  * when tree root is reached or backrefs for the block is cached.
635  *
636  * NOTE: if we find backrefs for a block are cached, we know backrefs
637  * for all upper level blocks that directly/indirectly reference the
638  * block are also cached.
639  */
640 static noinline_for_stack
641 struct backref_node *build_backref_tree(struct reloc_control *rc,
642                                         struct btrfs_key *node_key,
643                                         int level, u64 bytenr)
644 {
645         struct backref_cache *cache = &rc->backref_cache;
646         struct btrfs_path *path1;
647         struct btrfs_path *path2;
648         struct extent_buffer *eb;
649         struct btrfs_root *root;
650         struct backref_node *cur;
651         struct backref_node *upper;
652         struct backref_node *lower;
653         struct backref_node *node = NULL;
654         struct backref_node *exist = NULL;
655         struct backref_edge *edge;
656         struct rb_node *rb_node;
657         struct btrfs_key key;
658         unsigned long end;
659         unsigned long ptr;
660         LIST_HEAD(list);
661         LIST_HEAD(useless);
662         int cowonly;
663         int ret;
664         int err = 0;
665         bool need_check = true;
666
667         path1 = btrfs_alloc_path();
668         path2 = btrfs_alloc_path();
669         if (!path1 || !path2) {
670                 err = -ENOMEM;
671                 goto out;
672         }
673         path1->reada = READA_FORWARD;
674         path2->reada = READA_FORWARD;
675
676         node = alloc_backref_node(cache);
677         if (!node) {
678                 err = -ENOMEM;
679                 goto out;
680         }
681
682         node->bytenr = bytenr;
683         node->level = level;
684         node->lowest = 1;
685         cur = node;
686 again:
687         end = 0;
688         ptr = 0;
689         key.objectid = cur->bytenr;
690         key.type = BTRFS_METADATA_ITEM_KEY;
691         key.offset = (u64)-1;
692
693         path1->search_commit_root = 1;
694         path1->skip_locking = 1;
695         ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1,
696                                 0, 0);
697         if (ret < 0) {
698                 err = ret;
699                 goto out;
700         }
701         ASSERT(ret);
702         ASSERT(path1->slots[0]);
703
704         path1->slots[0]--;
705
706         WARN_ON(cur->checked);
707         if (!list_empty(&cur->upper)) {
708                 /*
709                  * the backref was added previously when processing
710                  * backref of type BTRFS_TREE_BLOCK_REF_KEY
711                  */
712                 ASSERT(list_is_singular(&cur->upper));
713                 edge = list_entry(cur->upper.next, struct backref_edge,
714                                   list[LOWER]);
715                 ASSERT(list_empty(&edge->list[UPPER]));
716                 exist = edge->node[UPPER];
717                 /*
718                  * add the upper level block to pending list if we need
719                  * check its backrefs
720                  */
721                 if (!exist->checked)
722                         list_add_tail(&edge->list[UPPER], &list);
723         } else {
724                 exist = NULL;
725         }
726
727         while (1) {
728                 cond_resched();
729                 eb = path1->nodes[0];
730
731                 if (ptr >= end) {
732                         if (path1->slots[0] >= btrfs_header_nritems(eb)) {
733                                 ret = btrfs_next_leaf(rc->extent_root, path1);
734                                 if (ret < 0) {
735                                         err = ret;
736                                         goto out;
737                                 }
738                                 if (ret > 0)
739                                         break;
740                                 eb = path1->nodes[0];
741                         }
742
743                         btrfs_item_key_to_cpu(eb, &key, path1->slots[0]);
744                         if (key.objectid != cur->bytenr) {
745                                 WARN_ON(exist);
746                                 break;
747                         }
748
749                         if (key.type == BTRFS_EXTENT_ITEM_KEY ||
750                             key.type == BTRFS_METADATA_ITEM_KEY) {
751                                 ret = find_inline_backref(eb, path1->slots[0],
752                                                           &ptr, &end);
753                                 if (ret)
754                                         goto next;
755                         }
756                 }
757
758                 if (ptr < end) {
759                         /* update key for inline back ref */
760                         struct btrfs_extent_inline_ref *iref;
761                         int type;
762                         iref = (struct btrfs_extent_inline_ref *)ptr;
763                         type = btrfs_get_extent_inline_ref_type(eb, iref,
764                                                         BTRFS_REF_TYPE_BLOCK);
765                         if (type == BTRFS_REF_TYPE_INVALID) {
766                                 err = -EUCLEAN;
767                                 goto out;
768                         }
769                         key.type = type;
770                         key.offset = btrfs_extent_inline_ref_offset(eb, iref);
771
772                         WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY &&
773                                 key.type != BTRFS_SHARED_BLOCK_REF_KEY);
774                 }
775
776                 if (exist &&
777                     ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
778                       exist->owner == key.offset) ||
779                      (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
780                       exist->bytenr == key.offset))) {
781                         exist = NULL;
782                         goto next;
783                 }
784
785                 ASSERT(key.type != BTRFS_EXTENT_REF_V0_KEY);
786                 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
787                         if (key.objectid == key.offset) {
788                                 /*
789                                  * only root blocks of reloc trees use
790                                  * backref of this type.
791                                  */
792                                 root = find_reloc_root(rc, cur->bytenr);
793                                 ASSERT(root);
794                                 cur->root = root;
795                                 break;
796                         }
797
798                         edge = alloc_backref_edge(cache);
799                         if (!edge) {
800                                 err = -ENOMEM;
801                                 goto out;
802                         }
803                         rb_node = tree_search(&cache->rb_root, key.offset);
804                         if (!rb_node) {
805                                 upper = alloc_backref_node(cache);
806                                 if (!upper) {
807                                         free_backref_edge(cache, edge);
808                                         err = -ENOMEM;
809                                         goto out;
810                                 }
811                                 upper->bytenr = key.offset;
812                                 upper->level = cur->level + 1;
813                                 /*
814                                  *  backrefs for the upper level block isn't
815                                  *  cached, add the block to pending list
816                                  */
817                                 list_add_tail(&edge->list[UPPER], &list);
818                         } else {
819                                 upper = rb_entry(rb_node, struct backref_node,
820                                                  rb_node);
821                                 ASSERT(upper->checked);
822                                 INIT_LIST_HEAD(&edge->list[UPPER]);
823                         }
824                         list_add_tail(&edge->list[LOWER], &cur->upper);
825                         edge->node[LOWER] = cur;
826                         edge->node[UPPER] = upper;
827
828                         goto next;
829                 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
830                         goto next;
831                 }
832
833                 /* key.type == BTRFS_TREE_BLOCK_REF_KEY */
834                 root = read_fs_root(rc->extent_root->fs_info, key.offset);
835                 if (IS_ERR(root)) {
836                         err = PTR_ERR(root);
837                         goto out;
838                 }
839
840                 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
841                         cur->cowonly = 1;
842
843                 if (btrfs_root_level(&root->root_item) == cur->level) {
844                         /* tree root */
845                         ASSERT(btrfs_root_bytenr(&root->root_item) ==
846                                cur->bytenr);
847                         if (should_ignore_root(root))
848                                 list_add(&cur->list, &useless);
849                         else
850                                 cur->root = root;
851                         break;
852                 }
853
854                 level = cur->level + 1;
855
856                 /*
857                  * searching the tree to find upper level blocks
858                  * reference the block.
859                  */
860                 path2->search_commit_root = 1;
861                 path2->skip_locking = 1;
862                 path2->lowest_level = level;
863                 ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0);
864                 path2->lowest_level = 0;
865                 if (ret < 0) {
866                         err = ret;
867                         goto out;
868                 }
869                 if (ret > 0 && path2->slots[level] > 0)
870                         path2->slots[level]--;
871
872                 eb = path2->nodes[level];
873                 if (btrfs_node_blockptr(eb, path2->slots[level]) !=
874                     cur->bytenr) {
875                         btrfs_err(root->fs_info,
876         "couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)",
877                                   cur->bytenr, level - 1, root->objectid,
878                                   node_key->objectid, node_key->type,
879                                   node_key->offset);
880                         err = -ENOENT;
881                         goto out;
882                 }
883                 lower = cur;
884                 need_check = true;
885                 for (; level < BTRFS_MAX_LEVEL; level++) {
886                         if (!path2->nodes[level]) {
887                                 ASSERT(btrfs_root_bytenr(&root->root_item) ==
888                                        lower->bytenr);
889                                 if (should_ignore_root(root))
890                                         list_add(&lower->list, &useless);
891                                 else
892                                         lower->root = root;
893                                 break;
894                         }
895
896                         edge = alloc_backref_edge(cache);
897                         if (!edge) {
898                                 err = -ENOMEM;
899                                 goto out;
900                         }
901
902                         eb = path2->nodes[level];
903                         rb_node = tree_search(&cache->rb_root, eb->start);
904                         if (!rb_node) {
905                                 upper = alloc_backref_node(cache);
906                                 if (!upper) {
907                                         free_backref_edge(cache, edge);
908                                         err = -ENOMEM;
909                                         goto out;
910                                 }
911                                 upper->bytenr = eb->start;
912                                 upper->owner = btrfs_header_owner(eb);
913                                 upper->level = lower->level + 1;
914                                 if (!test_bit(BTRFS_ROOT_REF_COWS,
915                                               &root->state))
916                                         upper->cowonly = 1;
917
918                                 /*
919                                  * if we know the block isn't shared
920                                  * we can void checking its backrefs.
921                                  */
922                                 if (btrfs_block_can_be_shared(root, eb))
923                                         upper->checked = 0;
924                                 else
925                                         upper->checked = 1;
926
927                                 /*
928                                  * add the block to pending list if we
929                                  * need check its backrefs, we only do this once
930                                  * while walking up a tree as we will catch
931                                  * anything else later on.
932                                  */
933                                 if (!upper->checked && need_check) {
934                                         need_check = false;
935                                         list_add_tail(&edge->list[UPPER],
936                                                       &list);
937                                 } else {
938                                         if (upper->checked)
939                                                 need_check = true;
940                                         INIT_LIST_HEAD(&edge->list[UPPER]);
941                                 }
942                         } else {
943                                 upper = rb_entry(rb_node, struct backref_node,
944                                                  rb_node);
945                                 ASSERT(upper->checked);
946                                 INIT_LIST_HEAD(&edge->list[UPPER]);
947                                 if (!upper->owner)
948                                         upper->owner = btrfs_header_owner(eb);
949                         }
950                         list_add_tail(&edge->list[LOWER], &lower->upper);
951                         edge->node[LOWER] = lower;
952                         edge->node[UPPER] = upper;
953
954                         if (rb_node)
955                                 break;
956                         lower = upper;
957                         upper = NULL;
958                 }
959                 btrfs_release_path(path2);
960 next:
961                 if (ptr < end) {
962                         ptr += btrfs_extent_inline_ref_size(key.type);
963                         if (ptr >= end) {
964                                 WARN_ON(ptr > end);
965                                 ptr = 0;
966                                 end = 0;
967                         }
968                 }
969                 if (ptr >= end)
970                         path1->slots[0]++;
971         }
972         btrfs_release_path(path1);
973
974         cur->checked = 1;
975         WARN_ON(exist);
976
977         /* the pending list isn't empty, take the first block to process */
978         if (!list_empty(&list)) {
979                 edge = list_entry(list.next, struct backref_edge, list[UPPER]);
980                 list_del_init(&edge->list[UPPER]);
981                 cur = edge->node[UPPER];
982                 goto again;
983         }
984
985         /*
986          * everything goes well, connect backref nodes and insert backref nodes
987          * into the cache.
988          */
989         ASSERT(node->checked);
990         cowonly = node->cowonly;
991         if (!cowonly) {
992                 rb_node = tree_insert(&cache->rb_root, node->bytenr,
993                                       &node->rb_node);
994                 if (rb_node)
995                         backref_tree_panic(rb_node, -EEXIST, node->bytenr);
996                 list_add_tail(&node->lower, &cache->leaves);
997         }
998
999         list_for_each_entry(edge, &node->upper, list[LOWER])
1000                 list_add_tail(&edge->list[UPPER], &list);
1001
1002         while (!list_empty(&list)) {
1003                 edge = list_entry(list.next, struct backref_edge, list[UPPER]);
1004                 list_del_init(&edge->list[UPPER]);
1005                 upper = edge->node[UPPER];
1006                 if (upper->detached) {
1007                         list_del(&edge->list[LOWER]);
1008                         lower = edge->node[LOWER];
1009                         free_backref_edge(cache, edge);
1010                         if (list_empty(&lower->upper))
1011                                 list_add(&lower->list, &useless);
1012                         continue;
1013                 }
1014
1015                 if (!RB_EMPTY_NODE(&upper->rb_node)) {
1016                         if (upper->lowest) {
1017                                 list_del_init(&upper->lower);
1018                                 upper->lowest = 0;
1019                         }
1020
1021                         list_add_tail(&edge->list[UPPER], &upper->lower);
1022                         continue;
1023                 }
1024
1025                 if (!upper->checked) {
1026                         /*
1027                          * Still want to blow up for developers since this is a
1028                          * logic bug.
1029                          */
1030                         ASSERT(0);
1031                         err = -EINVAL;
1032                         goto out;
1033                 }
1034                 if (cowonly != upper->cowonly) {
1035                         ASSERT(0);
1036                         err = -EINVAL;
1037                         goto out;
1038                 }
1039
1040                 if (!cowonly) {
1041                         rb_node = tree_insert(&cache->rb_root, upper->bytenr,
1042                                               &upper->rb_node);
1043                         if (rb_node)
1044                                 backref_tree_panic(rb_node, -EEXIST,
1045                                                    upper->bytenr);
1046                 }
1047
1048                 list_add_tail(&edge->list[UPPER], &upper->lower);
1049
1050                 list_for_each_entry(edge, &upper->upper, list[LOWER])
1051                         list_add_tail(&edge->list[UPPER], &list);
1052         }
1053         /*
1054          * process useless backref nodes. backref nodes for tree leaves
1055          * are deleted from the cache. backref nodes for upper level
1056          * tree blocks are left in the cache to avoid unnecessary backref
1057          * lookup.
1058          */
1059         while (!list_empty(&useless)) {
1060                 upper = list_entry(useless.next, struct backref_node, list);
1061                 list_del_init(&upper->list);
1062                 ASSERT(list_empty(&upper->upper));
1063                 if (upper == node)
1064                         node = NULL;
1065                 if (upper->lowest) {
1066                         list_del_init(&upper->lower);
1067                         upper->lowest = 0;
1068                 }
1069                 while (!list_empty(&upper->lower)) {
1070                         edge = list_entry(upper->lower.next,
1071                                           struct backref_edge, list[UPPER]);
1072                         list_del(&edge->list[UPPER]);
1073                         list_del(&edge->list[LOWER]);
1074                         lower = edge->node[LOWER];
1075                         free_backref_edge(cache, edge);
1076
1077                         if (list_empty(&lower->upper))
1078                                 list_add(&lower->list, &useless);
1079                 }
1080                 __mark_block_processed(rc, upper);
1081                 if (upper->level > 0) {
1082                         list_add(&upper->list, &cache->detached);
1083                         upper->detached = 1;
1084                 } else {
1085                         rb_erase(&upper->rb_node, &cache->rb_root);
1086                         free_backref_node(cache, upper);
1087                 }
1088         }
1089 out:
1090         btrfs_free_path(path1);
1091         btrfs_free_path(path2);
1092         if (err) {
1093                 while (!list_empty(&useless)) {
1094                         lower = list_entry(useless.next,
1095                                            struct backref_node, list);
1096                         list_del_init(&lower->list);
1097                 }
1098                 while (!list_empty(&list)) {
1099                         edge = list_first_entry(&list, struct backref_edge,
1100                                                 list[UPPER]);
1101                         list_del(&edge->list[UPPER]);
1102                         list_del(&edge->list[LOWER]);
1103                         lower = edge->node[LOWER];
1104                         upper = edge->node[UPPER];
1105                         free_backref_edge(cache, edge);
1106
1107                         /*
1108                          * Lower is no longer linked to any upper backref nodes
1109                          * and isn't in the cache, we can free it ourselves.
1110                          */
1111                         if (list_empty(&lower->upper) &&
1112                             RB_EMPTY_NODE(&lower->rb_node))
1113                                 list_add(&lower->list, &useless);
1114
1115                         if (!RB_EMPTY_NODE(&upper->rb_node))
1116                                 continue;
1117
1118                         /* Add this guy's upper edges to the list to process */
1119                         list_for_each_entry(edge, &upper->upper, list[LOWER])
1120                                 list_add_tail(&edge->list[UPPER], &list);
1121                         if (list_empty(&upper->upper))
1122                                 list_add(&upper->list, &useless);
1123                 }
1124
1125                 while (!list_empty(&useless)) {
1126                         lower = list_entry(useless.next,
1127                                            struct backref_node, list);
1128                         list_del_init(&lower->list);
1129                         if (lower == node)
1130                                 node = NULL;
1131                         free_backref_node(cache, lower);
1132                 }
1133
1134                 free_backref_node(cache, node);
1135                 return ERR_PTR(err);
1136         }
1137         ASSERT(!node || !node->detached);
1138         return node;
1139 }
1140
1141 /*
1142  * helper to add backref node for the newly created snapshot.
1143  * the backref node is created by cloning backref node that
1144  * corresponds to root of source tree
1145  */
1146 static int clone_backref_node(struct btrfs_trans_handle *trans,
1147                               struct reloc_control *rc,
1148                               struct btrfs_root *src,
1149                               struct btrfs_root *dest)
1150 {
1151         struct btrfs_root *reloc_root = src->reloc_root;
1152         struct backref_cache *cache = &rc->backref_cache;
1153         struct backref_node *node = NULL;
1154         struct backref_node *new_node;
1155         struct backref_edge *edge;
1156         struct backref_edge *new_edge;
1157         struct rb_node *rb_node;
1158
1159         if (cache->last_trans > 0)
1160                 update_backref_cache(trans, cache);
1161
1162         rb_node = tree_search(&cache->rb_root, src->commit_root->start);
1163         if (rb_node) {
1164                 node = rb_entry(rb_node, struct backref_node, rb_node);
1165                 if (node->detached)
1166                         node = NULL;
1167                 else
1168                         BUG_ON(node->new_bytenr != reloc_root->node->start);
1169         }
1170
1171         if (!node) {
1172                 rb_node = tree_search(&cache->rb_root,
1173                                       reloc_root->commit_root->start);
1174                 if (rb_node) {
1175                         node = rb_entry(rb_node, struct backref_node,
1176                                         rb_node);
1177                         BUG_ON(node->detached);
1178                 }
1179         }
1180
1181         if (!node)
1182                 return 0;
1183
1184         new_node = alloc_backref_node(cache);
1185         if (!new_node)
1186                 return -ENOMEM;
1187
1188         new_node->bytenr = dest->node->start;
1189         new_node->level = node->level;
1190         new_node->lowest = node->lowest;
1191         new_node->checked = 1;
1192         new_node->root = dest;
1193
1194         if (!node->lowest) {
1195                 list_for_each_entry(edge, &node->lower, list[UPPER]) {
1196                         new_edge = alloc_backref_edge(cache);
1197                         if (!new_edge)
1198                                 goto fail;
1199
1200                         new_edge->node[UPPER] = new_node;
1201                         new_edge->node[LOWER] = edge->node[LOWER];
1202                         list_add_tail(&new_edge->list[UPPER],
1203                                       &new_node->lower);
1204                 }
1205         } else {
1206                 list_add_tail(&new_node->lower, &cache->leaves);
1207         }
1208
1209         rb_node = tree_insert(&cache->rb_root, new_node->bytenr,
1210                               &new_node->rb_node);
1211         if (rb_node)
1212                 backref_tree_panic(rb_node, -EEXIST, new_node->bytenr);
1213
1214         if (!new_node->lowest) {
1215                 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
1216                         list_add_tail(&new_edge->list[LOWER],
1217                                       &new_edge->node[LOWER]->upper);
1218                 }
1219         }
1220         return 0;
1221 fail:
1222         while (!list_empty(&new_node->lower)) {
1223                 new_edge = list_entry(new_node->lower.next,
1224                                       struct backref_edge, list[UPPER]);
1225                 list_del(&new_edge->list[UPPER]);
1226                 free_backref_edge(cache, new_edge);
1227         }
1228         free_backref_node(cache, new_node);
1229         return -ENOMEM;
1230 }
1231
1232 /*
1233  * helper to add 'address of tree root -> reloc tree' mapping
1234  */
1235 static int __must_check __add_reloc_root(struct btrfs_root *root)
1236 {
1237         struct btrfs_fs_info *fs_info = root->fs_info;
1238         struct rb_node *rb_node;
1239         struct mapping_node *node;
1240         struct reloc_control *rc = fs_info->reloc_ctl;
1241
1242         node = kmalloc(sizeof(*node), GFP_NOFS);
1243         if (!node)
1244                 return -ENOMEM;
1245
1246         node->bytenr = root->node->start;
1247         node->data = root;
1248
1249         spin_lock(&rc->reloc_root_tree.lock);
1250         rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
1251                               node->bytenr, &node->rb_node);
1252         spin_unlock(&rc->reloc_root_tree.lock);
1253         if (rb_node) {
1254                 btrfs_panic(fs_info, -EEXIST,
1255                             "Duplicate root found for start=%llu while inserting into relocation tree",
1256                             node->bytenr);
1257         }
1258
1259         list_add_tail(&root->root_list, &rc->reloc_roots);
1260         return 0;
1261 }
1262
1263 /*
1264  * helper to delete the 'address of tree root -> reloc tree'
1265  * mapping
1266  */
1267 static void __del_reloc_root(struct btrfs_root *root)
1268 {
1269         struct btrfs_fs_info *fs_info = root->fs_info;
1270         struct rb_node *rb_node;
1271         struct mapping_node *node = NULL;
1272         struct reloc_control *rc = fs_info->reloc_ctl;
1273
1274         spin_lock(&rc->reloc_root_tree.lock);
1275         rb_node = tree_search(&rc->reloc_root_tree.rb_root,
1276                               root->node->start);
1277         if (rb_node) {
1278                 node = rb_entry(rb_node, struct mapping_node, rb_node);
1279                 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
1280         }
1281         spin_unlock(&rc->reloc_root_tree.lock);
1282
1283         if (!node)
1284                 return;
1285         BUG_ON((struct btrfs_root *)node->data != root);
1286
1287         spin_lock(&fs_info->trans_lock);
1288         list_del_init(&root->root_list);
1289         spin_unlock(&fs_info->trans_lock);
1290         kfree(node);
1291 }
1292
1293 /*
1294  * helper to update the 'address of tree root -> reloc tree'
1295  * mapping
1296  */
1297 static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr)
1298 {
1299         struct btrfs_fs_info *fs_info = root->fs_info;
1300         struct rb_node *rb_node;
1301         struct mapping_node *node = NULL;
1302         struct reloc_control *rc = fs_info->reloc_ctl;
1303
1304         spin_lock(&rc->reloc_root_tree.lock);
1305         rb_node = tree_search(&rc->reloc_root_tree.rb_root,
1306                               root->node->start);
1307         if (rb_node) {
1308                 node = rb_entry(rb_node, struct mapping_node, rb_node);
1309                 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
1310         }
1311         spin_unlock(&rc->reloc_root_tree.lock);
1312
1313         if (!node)
1314                 return 0;
1315         BUG_ON((struct btrfs_root *)node->data != root);
1316
1317         spin_lock(&rc->reloc_root_tree.lock);
1318         node->bytenr = new_bytenr;
1319         rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
1320                               node->bytenr, &node->rb_node);
1321         spin_unlock(&rc->reloc_root_tree.lock);
1322         if (rb_node)
1323                 backref_tree_panic(rb_node, -EEXIST, node->bytenr);
1324         return 0;
1325 }
1326
1327 static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
1328                                         struct btrfs_root *root, u64 objectid)
1329 {
1330         struct btrfs_fs_info *fs_info = root->fs_info;
1331         struct btrfs_root *reloc_root;
1332         struct extent_buffer *eb;
1333         struct btrfs_root_item *root_item;
1334         struct btrfs_key root_key;
1335         int ret;
1336
1337         root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
1338         BUG_ON(!root_item);
1339
1340         root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
1341         root_key.type = BTRFS_ROOT_ITEM_KEY;
1342         root_key.offset = objectid;
1343
1344         if (root->root_key.objectid == objectid) {
1345                 u64 commit_root_gen;
1346
1347                 /* called by btrfs_init_reloc_root */
1348                 ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
1349                                       BTRFS_TREE_RELOC_OBJECTID);
1350                 BUG_ON(ret);
1351                 /*
1352                  * Set the last_snapshot field to the generation of the commit
1353                  * root - like this ctree.c:btrfs_block_can_be_shared() behaves
1354                  * correctly (returns true) when the relocation root is created
1355                  * either inside the critical section of a transaction commit
1356                  * (through transaction.c:qgroup_account_snapshot()) and when
1357                  * it's created before the transaction commit is started.
1358                  */
1359                 commit_root_gen = btrfs_header_generation(root->commit_root);
1360                 btrfs_set_root_last_snapshot(&root->root_item, commit_root_gen);
1361         } else {
1362                 /*
1363                  * called by btrfs_reloc_post_snapshot_hook.
1364                  * the source tree is a reloc tree, all tree blocks
1365                  * modified after it was created have RELOC flag
1366                  * set in their headers. so it's OK to not update
1367                  * the 'last_snapshot'.
1368                  */
1369                 ret = btrfs_copy_root(trans, root, root->node, &eb,
1370                                       BTRFS_TREE_RELOC_OBJECTID);
1371                 BUG_ON(ret);
1372         }
1373
1374         memcpy(root_item, &root->root_item, sizeof(*root_item));
1375         btrfs_set_root_bytenr(root_item, eb->start);
1376         btrfs_set_root_level(root_item, btrfs_header_level(eb));
1377         btrfs_set_root_generation(root_item, trans->transid);
1378
1379         if (root->root_key.objectid == objectid) {
1380                 btrfs_set_root_refs(root_item, 0);
1381                 memset(&root_item->drop_progress, 0,
1382                        sizeof(struct btrfs_disk_key));
1383                 root_item->drop_level = 0;
1384         }
1385
1386         btrfs_tree_unlock(eb);
1387         free_extent_buffer(eb);
1388
1389         ret = btrfs_insert_root(trans, fs_info->tree_root,
1390                                 &root_key, root_item);
1391         BUG_ON(ret);
1392         kfree(root_item);
1393
1394         reloc_root = btrfs_read_fs_root(fs_info->tree_root, &root_key);
1395         BUG_ON(IS_ERR(reloc_root));
1396         reloc_root->last_trans = trans->transid;
1397         return reloc_root;
1398 }
1399
1400 /*
1401  * create reloc tree for a given fs tree. reloc tree is just a
1402  * snapshot of the fs tree with special root objectid.
1403  */
1404 int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
1405                           struct btrfs_root *root)
1406 {
1407         struct btrfs_fs_info *fs_info = root->fs_info;
1408         struct btrfs_root *reloc_root;
1409         struct reloc_control *rc = fs_info->reloc_ctl;
1410         struct btrfs_block_rsv *rsv;
1411         int clear_rsv = 0;
1412         int ret;
1413
1414         if (root->reloc_root) {
1415                 reloc_root = root->reloc_root;
1416                 reloc_root->last_trans = trans->transid;
1417                 return 0;
1418         }
1419
1420         if (!rc || !rc->create_reloc_tree ||
1421             root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1422                 return 0;
1423
1424         if (!trans->reloc_reserved) {
1425                 rsv = trans->block_rsv;
1426                 trans->block_rsv = rc->block_rsv;
1427                 clear_rsv = 1;
1428         }
1429         reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
1430         if (clear_rsv)
1431                 trans->block_rsv = rsv;
1432
1433         ret = __add_reloc_root(reloc_root);
1434         BUG_ON(ret < 0);
1435         root->reloc_root = reloc_root;
1436         return 0;
1437 }
1438
1439 /*
1440  * update root item of reloc tree
1441  */
1442 int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
1443                             struct btrfs_root *root)
1444 {
1445         struct btrfs_fs_info *fs_info = root->fs_info;
1446         struct btrfs_root *reloc_root;
1447         struct btrfs_root_item *root_item;
1448         int ret;
1449
1450         if (!root->reloc_root)
1451                 goto out;
1452
1453         reloc_root = root->reloc_root;
1454         root_item = &reloc_root->root_item;
1455
1456         if (fs_info->reloc_ctl->merge_reloc_tree &&
1457             btrfs_root_refs(root_item) == 0) {
1458                 root->reloc_root = NULL;
1459                 __del_reloc_root(reloc_root);
1460         }
1461
1462         if (reloc_root->commit_root != reloc_root->node) {
1463                 btrfs_set_root_node(root_item, reloc_root->node);
1464                 free_extent_buffer(reloc_root->commit_root);
1465                 reloc_root->commit_root = btrfs_root_node(reloc_root);
1466         }
1467
1468         ret = btrfs_update_root(trans, fs_info->tree_root,
1469                                 &reloc_root->root_key, root_item);
1470         BUG_ON(ret);
1471
1472 out:
1473         return 0;
1474 }
1475
1476 /*
1477  * helper to find first cached inode with inode number >= objectid
1478  * in a subvolume
1479  */
1480 static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
1481 {
1482         struct rb_node *node;
1483         struct rb_node *prev;
1484         struct btrfs_inode *entry;
1485         struct inode *inode;
1486
1487         spin_lock(&root->inode_lock);
1488 again:
1489         node = root->inode_tree.rb_node;
1490         prev = NULL;
1491         while (node) {
1492                 prev = node;
1493                 entry = rb_entry(node, struct btrfs_inode, rb_node);
1494
1495                 if (objectid < btrfs_ino(entry))
1496                         node = node->rb_left;
1497                 else if (objectid > btrfs_ino(entry))
1498                         node = node->rb_right;
1499                 else
1500                         break;
1501         }
1502         if (!node) {
1503                 while (prev) {
1504                         entry = rb_entry(prev, struct btrfs_inode, rb_node);
1505                         if (objectid <= btrfs_ino(entry)) {
1506                                 node = prev;
1507                                 break;
1508                         }
1509                         prev = rb_next(prev);
1510                 }
1511         }
1512         while (node) {
1513                 entry = rb_entry(node, struct btrfs_inode, rb_node);
1514                 inode = igrab(&entry->vfs_inode);
1515                 if (inode) {
1516                         spin_unlock(&root->inode_lock);
1517                         return inode;
1518                 }
1519
1520                 objectid = btrfs_ino(entry) + 1;
1521                 if (cond_resched_lock(&root->inode_lock))
1522                         goto again;
1523
1524                 node = rb_next(node);
1525         }
1526         spin_unlock(&root->inode_lock);
1527         return NULL;
1528 }
1529
1530 static int in_block_group(u64 bytenr,
1531                           struct btrfs_block_group_cache *block_group)
1532 {
1533         if (bytenr >= block_group->key.objectid &&
1534             bytenr < block_group->key.objectid + block_group->key.offset)
1535                 return 1;
1536         return 0;
1537 }
1538
1539 /*
1540  * get new location of data
1541  */
1542 static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1543                             u64 bytenr, u64 num_bytes)
1544 {
1545         struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
1546         struct btrfs_path *path;
1547         struct btrfs_file_extent_item *fi;
1548         struct extent_buffer *leaf;
1549         int ret;
1550
1551         path = btrfs_alloc_path();
1552         if (!path)
1553                 return -ENOMEM;
1554
1555         bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1556         ret = btrfs_lookup_file_extent(NULL, root, path,
1557                         btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0);
1558         if (ret < 0)
1559                 goto out;
1560         if (ret > 0) {
1561                 ret = -ENOENT;
1562                 goto out;
1563         }
1564
1565         leaf = path->nodes[0];
1566         fi = btrfs_item_ptr(leaf, path->slots[0],
1567                             struct btrfs_file_extent_item);
1568
1569         BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
1570                btrfs_file_extent_compression(leaf, fi) ||
1571                btrfs_file_extent_encryption(leaf, fi) ||
1572                btrfs_file_extent_other_encoding(leaf, fi));
1573
1574         if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1575                 ret = -EINVAL;
1576                 goto out;
1577         }
1578
1579         *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1580         ret = 0;
1581 out:
1582         btrfs_free_path(path);
1583         return ret;
1584 }
1585
1586 /*
1587  * update file extent items in the tree leaf to point to
1588  * the new locations.
1589  */
1590 static noinline_for_stack
1591 int replace_file_extents(struct btrfs_trans_handle *trans,
1592                          struct reloc_control *rc,
1593                          struct btrfs_root *root,
1594                          struct extent_buffer *leaf)
1595 {
1596         struct btrfs_fs_info *fs_info = root->fs_info;
1597         struct btrfs_key key;
1598         struct btrfs_file_extent_item *fi;
1599         struct inode *inode = NULL;
1600         u64 parent;
1601         u64 bytenr;
1602         u64 new_bytenr = 0;
1603         u64 num_bytes;
1604         u64 end;
1605         u32 nritems;
1606         u32 i;
1607         int ret = 0;
1608         int first = 1;
1609         int dirty = 0;
1610
1611         if (rc->stage != UPDATE_DATA_PTRS)
1612                 return 0;
1613
1614         /* reloc trees always use full backref */
1615         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1616                 parent = leaf->start;
1617         else
1618                 parent = 0;
1619
1620         nritems = btrfs_header_nritems(leaf);
1621         for (i = 0; i < nritems; i++) {
1622                 cond_resched();
1623                 btrfs_item_key_to_cpu(leaf, &key, i);
1624                 if (key.type != BTRFS_EXTENT_DATA_KEY)
1625                         continue;
1626                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1627                 if (btrfs_file_extent_type(leaf, fi) ==
1628                     BTRFS_FILE_EXTENT_INLINE)
1629                         continue;
1630                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1631                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1632                 if (bytenr == 0)
1633                         continue;
1634                 if (!in_block_group(bytenr, rc->block_group))
1635                         continue;
1636
1637                 /*
1638                  * if we are modifying block in fs tree, wait for readpage
1639                  * to complete and drop the extent cache
1640                  */
1641                 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1642                         if (first) {
1643                                 inode = find_next_inode(root, key.objectid);
1644                                 first = 0;
1645                         } else if (inode && btrfs_ino(BTRFS_I(inode)) < key.objectid) {
1646                                 btrfs_add_delayed_iput(inode);
1647                                 inode = find_next_inode(root, key.objectid);
1648                         }
1649                         if (inode && btrfs_ino(BTRFS_I(inode)) == key.objectid) {
1650                                 end = key.offset +
1651                                       btrfs_file_extent_num_bytes(leaf, fi);
1652                                 WARN_ON(!IS_ALIGNED(key.offset,
1653                                                     fs_info->sectorsize));
1654                                 WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
1655                                 end--;
1656                                 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
1657                                                       key.offset, end);
1658                                 if (!ret)
1659                                         continue;
1660
1661                                 btrfs_drop_extent_cache(BTRFS_I(inode),
1662                                                 key.offset,     end, 1);
1663                                 unlock_extent(&BTRFS_I(inode)->io_tree,
1664                                               key.offset, end);
1665                         }
1666                 }
1667
1668                 ret = get_new_location(rc->data_inode, &new_bytenr,
1669                                        bytenr, num_bytes);
1670                 if (ret) {
1671                         /*
1672                          * Don't have to abort since we've not changed anything
1673                          * in the file extent yet.
1674                          */
1675                         break;
1676                 }
1677
1678                 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
1679                 dirty = 1;
1680
1681                 key.offset -= btrfs_file_extent_offset(leaf, fi);
1682                 ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
1683                                            num_bytes, parent,
1684                                            btrfs_header_owner(leaf),
1685                                            key.objectid, key.offset);
1686                 if (ret) {
1687                         btrfs_abort_transaction(trans, ret);
1688                         break;
1689                 }
1690
1691                 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1692                                         parent, btrfs_header_owner(leaf),
1693                                         key.objectid, key.offset);
1694                 if (ret) {
1695                         btrfs_abort_transaction(trans, ret);
1696                         break;
1697                 }
1698         }
1699         if (dirty)
1700                 btrfs_mark_buffer_dirty(leaf);
1701         if (inode)
1702                 btrfs_add_delayed_iput(inode);
1703         return ret;
1704 }
1705
1706 static noinline_for_stack
1707 int memcmp_node_keys(struct extent_buffer *eb, int slot,
1708                      struct btrfs_path *path, int level)
1709 {
1710         struct btrfs_disk_key key1;
1711         struct btrfs_disk_key key2;
1712         btrfs_node_key(eb, &key1, slot);
1713         btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
1714         return memcmp(&key1, &key2, sizeof(key1));
1715 }
1716
1717 /*
1718  * try to replace tree blocks in fs tree with the new blocks
1719  * in reloc tree. tree blocks haven't been modified since the
1720  * reloc tree was create can be replaced.
1721  *
1722  * if a block was replaced, level of the block + 1 is returned.
1723  * if no block got replaced, 0 is returned. if there are other
1724  * errors, a negative error number is returned.
1725  */
1726 static noinline_for_stack
1727 int replace_path(struct btrfs_trans_handle *trans,
1728                  struct btrfs_root *dest, struct btrfs_root *src,
1729                  struct btrfs_path *path, struct btrfs_key *next_key,
1730                  int lowest_level, int max_level)
1731 {
1732         struct btrfs_fs_info *fs_info = dest->fs_info;
1733         struct extent_buffer *eb;
1734         struct extent_buffer *parent;
1735         struct btrfs_key key;
1736         u64 old_bytenr;
1737         u64 new_bytenr;
1738         u64 old_ptr_gen;
1739         u64 new_ptr_gen;
1740         u64 last_snapshot;
1741         u32 blocksize;
1742         int cow = 0;
1743         int level;
1744         int ret;
1745         int slot;
1746
1747         BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1748         BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
1749
1750         last_snapshot = btrfs_root_last_snapshot(&src->root_item);
1751 again:
1752         slot = path->slots[lowest_level];
1753         btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
1754
1755         eb = btrfs_lock_root_node(dest);
1756         btrfs_set_lock_blocking(eb);
1757         level = btrfs_header_level(eb);
1758
1759         if (level < lowest_level) {
1760                 btrfs_tree_unlock(eb);
1761                 free_extent_buffer(eb);
1762                 return 0;
1763         }
1764
1765         if (cow) {
1766                 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb);
1767                 BUG_ON(ret);
1768         }
1769         btrfs_set_lock_blocking(eb);
1770
1771         if (next_key) {
1772                 next_key->objectid = (u64)-1;
1773                 next_key->type = (u8)-1;
1774                 next_key->offset = (u64)-1;
1775         }
1776
1777         parent = eb;
1778         while (1) {
1779                 struct btrfs_key first_key;
1780
1781                 level = btrfs_header_level(parent);
1782                 BUG_ON(level < lowest_level);
1783
1784                 ret = btrfs_bin_search(parent, &key, level, &slot);
1785                 if (ret && slot > 0)
1786                         slot--;
1787
1788                 if (next_key && slot + 1 < btrfs_header_nritems(parent))
1789                         btrfs_node_key_to_cpu(parent, next_key, slot + 1);
1790
1791                 old_bytenr = btrfs_node_blockptr(parent, slot);
1792                 blocksize = fs_info->nodesize;
1793                 old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
1794                 btrfs_node_key_to_cpu(parent, &first_key, slot);
1795
1796                 if (level <= max_level) {
1797                         eb = path->nodes[level];
1798                         new_bytenr = btrfs_node_blockptr(eb,
1799                                                         path->slots[level]);
1800                         new_ptr_gen = btrfs_node_ptr_generation(eb,
1801                                                         path->slots[level]);
1802                 } else {
1803                         new_bytenr = 0;
1804                         new_ptr_gen = 0;
1805                 }
1806
1807                 if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) {
1808                         ret = level;
1809                         break;
1810                 }
1811
1812                 if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
1813                     memcmp_node_keys(parent, slot, path, level)) {
1814                         if (level <= lowest_level) {
1815                                 ret = 0;
1816                                 break;
1817                         }
1818
1819                         eb = read_tree_block(fs_info, old_bytenr, old_ptr_gen,
1820                                              level - 1, &first_key);
1821                         if (IS_ERR(eb)) {
1822                                 ret = PTR_ERR(eb);
1823                                 break;
1824                         } else if (!extent_buffer_uptodate(eb)) {
1825                                 ret = -EIO;
1826                                 free_extent_buffer(eb);
1827                                 break;
1828                         }
1829                         btrfs_tree_lock(eb);
1830                         if (cow) {
1831                                 ret = btrfs_cow_block(trans, dest, eb, parent,
1832                                                       slot, &eb);
1833                                 BUG_ON(ret);
1834                         }
1835                         btrfs_set_lock_blocking(eb);
1836
1837                         btrfs_tree_unlock(parent);
1838                         free_extent_buffer(parent);
1839
1840                         parent = eb;
1841                         continue;
1842                 }
1843
1844                 if (!cow) {
1845                         btrfs_tree_unlock(parent);
1846                         free_extent_buffer(parent);
1847                         cow = 1;
1848                         goto again;
1849                 }
1850
1851                 btrfs_node_key_to_cpu(path->nodes[level], &key,
1852                                       path->slots[level]);
1853                 btrfs_release_path(path);
1854
1855                 path->lowest_level = level;
1856                 ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
1857                 path->lowest_level = 0;
1858                 BUG_ON(ret);
1859
1860                 /*
1861                  * Info qgroup to trace both subtrees.
1862                  *
1863                  * We must trace both trees.
1864                  * 1) Tree reloc subtree
1865                  *    If not traced, we will leak data numbers
1866                  * 2) Fs subtree
1867                  *    If not traced, we will double count old data
1868                  *    and tree block numbers, if current trans doesn't free
1869                  *    data reloc tree inode.
1870                  */
1871                 ret = btrfs_qgroup_trace_subtree(trans, src, parent,
1872                                 btrfs_header_generation(parent),
1873                                 btrfs_header_level(parent));
1874                 if (ret < 0)
1875                         break;
1876                 ret = btrfs_qgroup_trace_subtree(trans, dest,
1877                                 path->nodes[level],
1878                                 btrfs_header_generation(path->nodes[level]),
1879                                 btrfs_header_level(path->nodes[level]));
1880                 if (ret < 0)
1881                         break;
1882
1883                 /*
1884                  * swap blocks in fs tree and reloc tree.
1885                  */
1886                 btrfs_set_node_blockptr(parent, slot, new_bytenr);
1887                 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
1888                 btrfs_mark_buffer_dirty(parent);
1889
1890                 btrfs_set_node_blockptr(path->nodes[level],
1891                                         path->slots[level], old_bytenr);
1892                 btrfs_set_node_ptr_generation(path->nodes[level],
1893                                               path->slots[level], old_ptr_gen);
1894                 btrfs_mark_buffer_dirty(path->nodes[level]);
1895
1896                 ret = btrfs_inc_extent_ref(trans, src, old_bytenr,
1897                                         blocksize, path->nodes[level]->start,
1898                                         src->root_key.objectid, level - 1, 0);
1899                 BUG_ON(ret);
1900                 ret = btrfs_inc_extent_ref(trans, dest, new_bytenr,
1901                                         blocksize, 0, dest->root_key.objectid,
1902                                         level - 1, 0);
1903                 BUG_ON(ret);
1904
1905                 ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
1906                                         path->nodes[level]->start,
1907                                         src->root_key.objectid, level - 1, 0);
1908                 BUG_ON(ret);
1909
1910                 ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
1911                                         0, dest->root_key.objectid, level - 1,
1912                                         0);
1913                 BUG_ON(ret);
1914
1915                 btrfs_unlock_up_safe(path, 0);
1916
1917                 ret = level;
1918                 break;
1919         }
1920         btrfs_tree_unlock(parent);
1921         free_extent_buffer(parent);
1922         return ret;
1923 }
1924
1925 /*
1926  * helper to find next relocated block in reloc tree
1927  */
1928 static noinline_for_stack
1929 int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1930                        int *level)
1931 {
1932         struct extent_buffer *eb;
1933         int i;
1934         u64 last_snapshot;
1935         u32 nritems;
1936
1937         last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1938
1939         for (i = 0; i < *level; i++) {
1940                 free_extent_buffer(path->nodes[i]);
1941                 path->nodes[i] = NULL;
1942         }
1943
1944         for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
1945                 eb = path->nodes[i];
1946                 nritems = btrfs_header_nritems(eb);
1947                 while (path->slots[i] + 1 < nritems) {
1948                         path->slots[i]++;
1949                         if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
1950                             last_snapshot)
1951                                 continue;
1952
1953                         *level = i;
1954                         return 0;
1955                 }
1956                 free_extent_buffer(path->nodes[i]);
1957                 path->nodes[i] = NULL;
1958         }
1959         return 1;
1960 }
1961
1962 /*
1963  * walk down reloc tree to find relocated block of lowest level
1964  */
1965 static noinline_for_stack
1966 int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1967                          int *level)
1968 {
1969         struct btrfs_fs_info *fs_info = root->fs_info;
1970         struct extent_buffer *eb = NULL;
1971         int i;
1972         u64 bytenr;
1973         u64 ptr_gen = 0;
1974         u64 last_snapshot;
1975         u32 nritems;
1976
1977         last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1978
1979         for (i = *level; i > 0; i--) {
1980                 struct btrfs_key first_key;
1981
1982                 eb = path->nodes[i];
1983                 nritems = btrfs_header_nritems(eb);
1984                 while (path->slots[i] < nritems) {
1985                         ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
1986                         if (ptr_gen > last_snapshot)
1987                                 break;
1988                         path->slots[i]++;
1989                 }
1990                 if (path->slots[i] >= nritems) {
1991                         if (i == *level)
1992                                 break;
1993                         *level = i + 1;
1994                         return 0;
1995                 }
1996                 if (i == 1) {
1997                         *level = i;
1998                         return 0;
1999                 }
2000
2001                 bytenr = btrfs_node_blockptr(eb, path->slots[i]);
2002                 btrfs_node_key_to_cpu(eb, &first_key, path->slots[i]);
2003                 eb = read_tree_block(fs_info, bytenr, ptr_gen, i - 1,
2004                                      &first_key);
2005                 if (IS_ERR(eb)) {
2006                         return PTR_ERR(eb);
2007                 } else if (!extent_buffer_uptodate(eb)) {
2008                         free_extent_buffer(eb);
2009                         return -EIO;
2010                 }
2011                 BUG_ON(btrfs_header_level(eb) != i - 1);
2012                 path->nodes[i - 1] = eb;
2013                 path->slots[i - 1] = 0;
2014         }
2015         return 1;
2016 }
2017
2018 /*
2019  * invalidate extent cache for file extents whose key in range of
2020  * [min_key, max_key)
2021  */
2022 static int invalidate_extent_cache(struct btrfs_root *root,
2023                                    struct btrfs_key *min_key,
2024                                    struct btrfs_key *max_key)
2025 {
2026         struct btrfs_fs_info *fs_info = root->fs_info;
2027         struct inode *inode = NULL;
2028         u64 objectid;
2029         u64 start, end;
2030         u64 ino;
2031
2032         objectid = min_key->objectid;
2033         while (1) {
2034                 cond_resched();
2035                 iput(inode);
2036
2037                 if (objectid > max_key->objectid)
2038                         break;
2039
2040                 inode = find_next_inode(root, objectid);
2041                 if (!inode)
2042                         break;
2043                 ino = btrfs_ino(BTRFS_I(inode));
2044
2045                 if (ino > max_key->objectid) {
2046                         iput(inode);
2047                         break;
2048                 }
2049
2050                 objectid = ino + 1;
2051                 if (!S_ISREG(inode->i_mode))
2052                         continue;
2053
2054                 if (unlikely(min_key->objectid == ino)) {
2055                         if (min_key->type > BTRFS_EXTENT_DATA_KEY)
2056                                 continue;
2057                         if (min_key->type < BTRFS_EXTENT_DATA_KEY)
2058                                 start = 0;
2059                         else {
2060                                 start = min_key->offset;
2061                                 WARN_ON(!IS_ALIGNED(start, fs_info->sectorsize));
2062                         }
2063                 } else {
2064                         start = 0;
2065                 }
2066
2067                 if (unlikely(max_key->objectid == ino)) {
2068                         if (max_key->type < BTRFS_EXTENT_DATA_KEY)
2069                                 continue;
2070                         if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
2071                                 end = (u64)-1;
2072                         } else {
2073                                 if (max_key->offset == 0)
2074                                         continue;
2075                                 end = max_key->offset;
2076                                 WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
2077                                 end--;
2078                         }
2079                 } else {
2080                         end = (u64)-1;
2081                 }
2082
2083                 /* the lock_extent waits for readpage to complete */
2084                 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
2085                 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 1);
2086                 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
2087         }
2088         return 0;
2089 }
2090
2091 static int find_next_key(struct btrfs_path *path, int level,
2092                          struct btrfs_key *key)
2093
2094 {
2095         while (level < BTRFS_MAX_LEVEL) {
2096                 if (!path->nodes[level])
2097                         break;
2098                 if (path->slots[level] + 1 <
2099                     btrfs_header_nritems(path->nodes[level])) {
2100                         btrfs_node_key_to_cpu(path->nodes[level], key,
2101                                               path->slots[level] + 1);
2102                         return 0;
2103                 }
2104                 level++;
2105         }
2106         return 1;
2107 }
2108
2109 /*
2110  * merge the relocated tree blocks in reloc tree with corresponding
2111  * fs tree.
2112  */
2113 static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
2114                                                struct btrfs_root *root)
2115 {
2116         struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2117         LIST_HEAD(inode_list);
2118         struct btrfs_key key;
2119         struct btrfs_key next_key;
2120         struct btrfs_trans_handle *trans = NULL;
2121         struct btrfs_root *reloc_root;
2122         struct btrfs_root_item *root_item;
2123         struct btrfs_path *path;
2124         struct extent_buffer *leaf;
2125         int level;
2126         int max_level;
2127         int replaced = 0;
2128         int ret;
2129         int err = 0;
2130         u32 min_reserved;
2131
2132         path = btrfs_alloc_path();
2133         if (!path)
2134                 return -ENOMEM;
2135         path->reada = READA_FORWARD;
2136
2137         reloc_root = root->reloc_root;
2138         root_item = &reloc_root->root_item;
2139
2140         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2141                 level = btrfs_root_level(root_item);
2142                 extent_buffer_get(reloc_root->node);
2143                 path->nodes[level] = reloc_root->node;
2144                 path->slots[level] = 0;
2145         } else {
2146                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2147
2148                 level = root_item->drop_level;
2149                 BUG_ON(level == 0);
2150                 path->lowest_level = level;
2151                 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
2152                 path->lowest_level = 0;
2153                 if (ret < 0) {
2154                         btrfs_free_path(path);
2155                         return ret;
2156                 }
2157
2158                 btrfs_node_key_to_cpu(path->nodes[level], &next_key,
2159                                       path->slots[level]);
2160                 WARN_ON(memcmp(&key, &next_key, sizeof(key)));
2161
2162                 btrfs_unlock_up_safe(path, 0);
2163         }
2164
2165         min_reserved = fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
2166         memset(&next_key, 0, sizeof(next_key));
2167
2168         while (1) {
2169                 ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
2170                                              BTRFS_RESERVE_FLUSH_ALL);
2171                 if (ret) {
2172                         err = ret;
2173                         goto out;
2174                 }
2175                 trans = btrfs_start_transaction(root, 0);
2176                 if (IS_ERR(trans)) {
2177                         err = PTR_ERR(trans);
2178                         trans = NULL;
2179                         goto out;
2180                 }
2181                 trans->block_rsv = rc->block_rsv;
2182
2183                 replaced = 0;
2184                 max_level = level;
2185
2186                 ret = walk_down_reloc_tree(reloc_root, path, &level);
2187                 if (ret < 0) {
2188                         err = ret;
2189                         goto out;
2190                 }
2191                 if (ret > 0)
2192                         break;
2193
2194                 if (!find_next_key(path, level, &key) &&
2195                     btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
2196                         ret = 0;
2197                 } else {
2198                         ret = replace_path(trans, root, reloc_root, path,
2199                                            &next_key, level, max_level);
2200                 }
2201                 if (ret < 0) {
2202                         err = ret;
2203                         goto out;
2204                 }
2205
2206                 if (ret > 0) {
2207                         level = ret;
2208                         btrfs_node_key_to_cpu(path->nodes[level], &key,
2209                                               path->slots[level]);
2210                         replaced = 1;
2211                 }
2212
2213                 ret = walk_up_reloc_tree(reloc_root, path, &level);
2214                 if (ret > 0)
2215                         break;
2216
2217                 BUG_ON(level == 0);
2218                 /*
2219                  * save the merging progress in the drop_progress.
2220                  * this is OK since root refs == 1 in this case.
2221                  */
2222                 btrfs_node_key(path->nodes[level], &root_item->drop_progress,
2223                                path->slots[level]);
2224                 root_item->drop_level = level;
2225
2226                 btrfs_end_transaction_throttle(trans);
2227                 trans = NULL;
2228
2229                 btrfs_btree_balance_dirty(fs_info);
2230
2231                 if (replaced && rc->stage == UPDATE_DATA_PTRS)
2232                         invalidate_extent_cache(root, &key, &next_key);
2233         }
2234
2235         /*
2236          * handle the case only one block in the fs tree need to be
2237          * relocated and the block is tree root.
2238          */
2239         leaf = btrfs_lock_root_node(root);
2240         ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf);
2241         btrfs_tree_unlock(leaf);
2242         free_extent_buffer(leaf);
2243         if (ret < 0)
2244                 err = ret;
2245 out:
2246         btrfs_free_path(path);
2247
2248         if (err == 0) {
2249                 memset(&root_item->drop_progress, 0,
2250                        sizeof(root_item->drop_progress));
2251                 root_item->drop_level = 0;
2252                 btrfs_set_root_refs(root_item, 0);
2253                 btrfs_update_reloc_root(trans, root);
2254         }
2255
2256         if (trans)
2257                 btrfs_end_transaction_throttle(trans);
2258
2259         btrfs_btree_balance_dirty(fs_info);
2260
2261         if (replaced && rc->stage == UPDATE_DATA_PTRS)
2262                 invalidate_extent_cache(root, &key, &next_key);
2263
2264         return err;
2265 }
2266
2267 static noinline_for_stack
2268 int prepare_to_merge(struct reloc_control *rc, int err)
2269 {
2270         struct btrfs_root *root = rc->extent_root;
2271         struct btrfs_fs_info *fs_info = root->fs_info;
2272         struct btrfs_root *reloc_root;
2273         struct btrfs_trans_handle *trans;
2274         LIST_HEAD(reloc_roots);
2275         u64 num_bytes = 0;
2276         int ret;
2277
2278         mutex_lock(&fs_info->reloc_mutex);
2279         rc->merging_rsv_size += fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
2280         rc->merging_rsv_size += rc->nodes_relocated * 2;
2281         mutex_unlock(&fs_info->reloc_mutex);
2282
2283 again:
2284         if (!err) {
2285                 num_bytes = rc->merging_rsv_size;
2286                 ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes,
2287                                           BTRFS_RESERVE_FLUSH_ALL);
2288                 if (ret)
2289                         err = ret;
2290         }
2291
2292         trans = btrfs_join_transaction(rc->extent_root);
2293         if (IS_ERR(trans)) {
2294                 if (!err)
2295                         btrfs_block_rsv_release(fs_info, rc->block_rsv,
2296                                                 num_bytes);
2297                 return PTR_ERR(trans);
2298         }
2299
2300         if (!err) {
2301                 if (num_bytes != rc->merging_rsv_size) {
2302                         btrfs_end_transaction(trans);
2303                         btrfs_block_rsv_release(fs_info, rc->block_rsv,
2304                                                 num_bytes);
2305                         goto again;
2306                 }
2307         }
2308
2309         rc->merge_reloc_tree = 1;
2310
2311         while (!list_empty(&rc->reloc_roots)) {
2312                 reloc_root = list_entry(rc->reloc_roots.next,
2313                                         struct btrfs_root, root_list);
2314                 list_del_init(&reloc_root->root_list);
2315
2316                 root = read_fs_root(fs_info, reloc_root->root_key.offset);
2317                 BUG_ON(IS_ERR(root));
2318                 BUG_ON(root->reloc_root != reloc_root);
2319
2320                 /*
2321                  * set reference count to 1, so btrfs_recover_relocation
2322                  * knows it should resumes merging
2323                  */
2324                 if (!err)
2325                         btrfs_set_root_refs(&reloc_root->root_item, 1);
2326                 btrfs_update_reloc_root(trans, root);
2327
2328                 list_add(&reloc_root->root_list, &reloc_roots);
2329         }
2330
2331         list_splice(&reloc_roots, &rc->reloc_roots);
2332
2333         if (!err)
2334                 btrfs_commit_transaction(trans);
2335         else
2336                 btrfs_end_transaction(trans);
2337         return err;
2338 }
2339
2340 static noinline_for_stack
2341 void free_reloc_roots(struct list_head *list)
2342 {
2343         struct btrfs_root *reloc_root;
2344
2345         while (!list_empty(list)) {
2346                 reloc_root = list_entry(list->next, struct btrfs_root,
2347                                         root_list);
2348                 __del_reloc_root(reloc_root);
2349                 free_extent_buffer(reloc_root->node);
2350                 free_extent_buffer(reloc_root->commit_root);
2351                 reloc_root->node = NULL;
2352                 reloc_root->commit_root = NULL;
2353         }
2354 }
2355
2356 static noinline_for_stack
2357 void merge_reloc_roots(struct reloc_control *rc)
2358 {
2359         struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2360         struct btrfs_root *root;
2361         struct btrfs_root *reloc_root;
2362         LIST_HEAD(reloc_roots);
2363         int found = 0;
2364         int ret = 0;
2365 again:
2366         root = rc->extent_root;
2367
2368         /*
2369          * this serializes us with btrfs_record_root_in_transaction,
2370          * we have to make sure nobody is in the middle of
2371          * adding their roots to the list while we are
2372          * doing this splice
2373          */
2374         mutex_lock(&fs_info->reloc_mutex);
2375         list_splice_init(&rc->reloc_roots, &reloc_roots);
2376         mutex_unlock(&fs_info->reloc_mutex);
2377
2378         while (!list_empty(&reloc_roots)) {
2379                 found = 1;
2380                 reloc_root = list_entry(reloc_roots.next,
2381                                         struct btrfs_root, root_list);
2382
2383                 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
2384                         root = read_fs_root(fs_info,
2385                                             reloc_root->root_key.offset);
2386                         BUG_ON(IS_ERR(root));
2387                         BUG_ON(root->reloc_root != reloc_root);
2388
2389                         ret = merge_reloc_root(rc, root);
2390                         if (ret) {
2391                                 if (list_empty(&reloc_root->root_list))
2392                                         list_add_tail(&reloc_root->root_list,
2393                                                       &reloc_roots);
2394                                 goto out;
2395                         }
2396                 } else {
2397                         list_del_init(&reloc_root->root_list);
2398                 }
2399
2400                 ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
2401                 if (ret < 0) {
2402                         if (list_empty(&reloc_root->root_list))
2403                                 list_add_tail(&reloc_root->root_list,
2404                                               &reloc_roots);
2405                         goto out;
2406                 }
2407         }
2408
2409         if (found) {
2410                 found = 0;
2411                 goto again;
2412         }
2413 out:
2414         if (ret) {
2415                 btrfs_handle_fs_error(fs_info, ret, NULL);
2416                 if (!list_empty(&reloc_roots))
2417                         free_reloc_roots(&reloc_roots);
2418
2419                 /* new reloc root may be added */
2420                 mutex_lock(&fs_info->reloc_mutex);
2421                 list_splice_init(&rc->reloc_roots, &reloc_roots);
2422                 mutex_unlock(&fs_info->reloc_mutex);
2423                 if (!list_empty(&reloc_roots))
2424                         free_reloc_roots(&reloc_roots);
2425         }
2426
2427         BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
2428 }
2429
2430 static void free_block_list(struct rb_root *blocks)
2431 {
2432         struct tree_block *block;
2433         struct rb_node *rb_node;
2434         while ((rb_node = rb_first(blocks))) {
2435                 block = rb_entry(rb_node, struct tree_block, rb_node);
2436                 rb_erase(rb_node, blocks);
2437                 kfree(block);
2438         }
2439 }
2440
2441 static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
2442                                       struct btrfs_root *reloc_root)
2443 {
2444         struct btrfs_fs_info *fs_info = reloc_root->fs_info;
2445         struct btrfs_root *root;
2446
2447         if (reloc_root->last_trans == trans->transid)
2448                 return 0;
2449
2450         root = read_fs_root(fs_info, reloc_root->root_key.offset);
2451         BUG_ON(IS_ERR(root));
2452         BUG_ON(root->reloc_root != reloc_root);
2453
2454         return btrfs_record_root_in_trans(trans, root);
2455 }
2456
2457 static noinline_for_stack
2458 struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
2459                                      struct reloc_control *rc,
2460                                      struct backref_node *node,
2461                                      struct backref_edge *edges[])
2462 {
2463         struct backref_node *next;
2464         struct btrfs_root *root;
2465         int index = 0;
2466
2467         next = node;
2468         while (1) {
2469                 cond_resched();
2470                 next = walk_up_backref(next, edges, &index);
2471                 root = next->root;
2472                 BUG_ON(!root);
2473                 BUG_ON(!test_bit(BTRFS_ROOT_REF_COWS, &root->state));
2474
2475                 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2476                         record_reloc_root_in_trans(trans, root);
2477                         break;
2478                 }
2479
2480                 btrfs_record_root_in_trans(trans, root);
2481                 root = root->reloc_root;
2482
2483                 if (next->new_bytenr != root->node->start) {
2484                         BUG_ON(next->new_bytenr);
2485                         BUG_ON(!list_empty(&next->list));
2486                         next->new_bytenr = root->node->start;
2487                         next->root = root;
2488                         list_add_tail(&next->list,
2489                                       &rc->backref_cache.changed);
2490                         __mark_block_processed(rc, next);
2491                         break;
2492                 }
2493
2494                 WARN_ON(1);
2495                 root = NULL;
2496                 next = walk_down_backref(edges, &index);
2497                 if (!next || next->level <= node->level)
2498                         break;
2499         }
2500         if (!root)
2501                 return NULL;
2502
2503         next = node;
2504         /* setup backref node path for btrfs_reloc_cow_block */
2505         while (1) {
2506                 rc->backref_cache.path[next->level] = next;
2507                 if (--index < 0)
2508                         break;
2509                 next = edges[index]->node[UPPER];
2510         }
2511         return root;
2512 }
2513
2514 /*
2515  * select a tree root for relocation. return NULL if the block
2516  * is reference counted. we should use do_relocation() in this
2517  * case. return a tree root pointer if the block isn't reference
2518  * counted. return -ENOENT if the block is root of reloc tree.
2519  */
2520 static noinline_for_stack
2521 struct btrfs_root *select_one_root(struct backref_node *node)
2522 {
2523         struct backref_node *next;
2524         struct btrfs_root *root;
2525         struct btrfs_root *fs_root = NULL;
2526         struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2527         int index = 0;
2528
2529         next = node;
2530         while (1) {
2531                 cond_resched();
2532                 next = walk_up_backref(next, edges, &index);
2533                 root = next->root;
2534                 BUG_ON(!root);
2535
2536                 /* no other choice for non-references counted tree */
2537                 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
2538                         return root;
2539
2540                 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)
2541                         fs_root = root;
2542
2543                 if (next != node)
2544                         return NULL;
2545
2546                 next = walk_down_backref(edges, &index);
2547                 if (!next || next->level <= node->level)
2548                         break;
2549         }
2550
2551         if (!fs_root)
2552                 return ERR_PTR(-ENOENT);
2553         return fs_root;
2554 }
2555
2556 static noinline_for_stack
2557 u64 calcu_metadata_size(struct reloc_control *rc,
2558                         struct backref_node *node, int reserve)
2559 {
2560         struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2561         struct backref_node *next = node;
2562         struct backref_edge *edge;
2563         struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2564         u64 num_bytes = 0;
2565         int index = 0;
2566
2567         BUG_ON(reserve && node->processed);
2568
2569         while (next) {
2570                 cond_resched();
2571                 while (1) {
2572                         if (next->processed && (reserve || next != node))
2573                                 break;
2574
2575                         num_bytes += fs_info->nodesize;
2576
2577                         if (list_empty(&next->upper))
2578                                 break;
2579
2580                         edge = list_entry(next->upper.next,
2581                                           struct backref_edge, list[LOWER]);
2582                         edges[index++] = edge;
2583                         next = edge->node[UPPER];
2584                 }
2585                 next = walk_down_backref(edges, &index);
2586         }
2587         return num_bytes;
2588 }
2589
2590 static int reserve_metadata_space(struct btrfs_trans_handle *trans,
2591                                   struct reloc_control *rc,
2592                                   struct backref_node *node)
2593 {
2594         struct btrfs_root *root = rc->extent_root;
2595         struct btrfs_fs_info *fs_info = root->fs_info;
2596         u64 num_bytes;
2597         int ret;
2598         u64 tmp;
2599
2600         num_bytes = calcu_metadata_size(rc, node, 1) * 2;
2601
2602         trans->block_rsv = rc->block_rsv;
2603         rc->reserved_bytes += num_bytes;
2604
2605         /*
2606          * We are under a transaction here so we can only do limited flushing.
2607          * If we get an enospc just kick back -EAGAIN so we know to drop the
2608          * transaction and try to refill when we can flush all the things.
2609          */
2610         ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes,
2611                                 BTRFS_RESERVE_FLUSH_LIMIT);
2612         if (ret) {
2613                 tmp = fs_info->nodesize * RELOCATION_RESERVED_NODES;
2614                 while (tmp <= rc->reserved_bytes)
2615                         tmp <<= 1;
2616                 /*
2617                  * only one thread can access block_rsv at this point,
2618                  * so we don't need hold lock to protect block_rsv.
2619                  * we expand more reservation size here to allow enough
2620                  * space for relocation and we will return eailer in
2621                  * enospc case.
2622                  */
2623                 rc->block_rsv->size = tmp + fs_info->nodesize *
2624                                       RELOCATION_RESERVED_NODES;
2625                 return -EAGAIN;
2626         }
2627
2628         return 0;
2629 }
2630
2631 /*
2632  * relocate a block tree, and then update pointers in upper level
2633  * blocks that reference the block to point to the new location.
2634  *
2635  * if called by link_to_upper, the block has already been relocated.
2636  * in that case this function just updates pointers.
2637  */
2638 static int do_relocation(struct btrfs_trans_handle *trans,
2639                          struct reloc_control *rc,
2640                          struct backref_node *node,
2641                          struct btrfs_key *key,
2642                          struct btrfs_path *path, int lowest)
2643 {
2644         struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2645         struct backref_node *upper;
2646         struct backref_edge *edge;
2647         struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2648         struct btrfs_root *root;
2649         struct extent_buffer *eb;
2650         u32 blocksize;
2651         u64 bytenr;
2652         u64 generation;
2653         int slot;
2654         int ret;
2655         int err = 0;
2656
2657         BUG_ON(lowest && node->eb);
2658
2659         path->lowest_level = node->level + 1;
2660         rc->backref_cache.path[node->level] = node;
2661         list_for_each_entry(edge, &node->upper, list[LOWER]) {
2662                 struct btrfs_key first_key;
2663
2664                 cond_resched();
2665
2666                 upper = edge->node[UPPER];
2667                 root = select_reloc_root(trans, rc, upper, edges);
2668                 BUG_ON(!root);
2669
2670                 if (upper->eb && !upper->locked) {
2671                         if (!lowest) {
2672                                 ret = btrfs_bin_search(upper->eb, key,
2673                                                        upper->level, &slot);
2674                                 BUG_ON(ret);
2675                                 bytenr = btrfs_node_blockptr(upper->eb, slot);
2676                                 if (node->eb->start == bytenr)
2677                                         goto next;
2678                         }
2679                         drop_node_buffer(upper);
2680                 }
2681
2682                 if (!upper->eb) {
2683                         ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2684                         if (ret) {
2685                                 if (ret < 0)
2686                                         err = ret;
2687                                 else
2688                                         err = -ENOENT;
2689
2690                                 btrfs_release_path(path);
2691                                 break;
2692                         }
2693
2694                         if (!upper->eb) {
2695                                 upper->eb = path->nodes[upper->level];
2696                                 path->nodes[upper->level] = NULL;
2697                         } else {
2698                                 BUG_ON(upper->eb != path->nodes[upper->level]);
2699                         }
2700
2701                         upper->locked = 1;
2702                         path->locks[upper->level] = 0;
2703
2704                         slot = path->slots[upper->level];
2705                         btrfs_release_path(path);
2706                 } else {
2707                         ret = btrfs_bin_search(upper->eb, key, upper->level,
2708                                                &slot);
2709                         BUG_ON(ret);
2710                 }
2711
2712                 bytenr = btrfs_node_blockptr(upper->eb, slot);
2713                 if (lowest) {
2714                         if (bytenr != node->bytenr) {
2715                                 btrfs_err(root->fs_info,
2716                 "lowest leaf/node mismatch: bytenr %llu node->bytenr %llu slot %d upper %llu",
2717                                           bytenr, node->bytenr, slot,
2718                                           upper->eb->start);
2719                                 err = -EIO;
2720                                 goto next;
2721                         }
2722                 } else {
2723                         if (node->eb->start == bytenr)
2724                                 goto next;
2725                 }
2726
2727                 blocksize = root->fs_info->nodesize;
2728                 generation = btrfs_node_ptr_generation(upper->eb, slot);
2729                 btrfs_node_key_to_cpu(upper->eb, &first_key, slot);
2730                 eb = read_tree_block(fs_info, bytenr, generation,
2731                                      upper->level - 1, &first_key);
2732                 if (IS_ERR(eb)) {
2733                         err = PTR_ERR(eb);
2734                         goto next;
2735                 } else if (!extent_buffer_uptodate(eb)) {
2736                         free_extent_buffer(eb);
2737                         err = -EIO;
2738                         goto next;
2739                 }
2740                 btrfs_tree_lock(eb);
2741                 btrfs_set_lock_blocking(eb);
2742
2743                 if (!node->eb) {
2744                         ret = btrfs_cow_block(trans, root, eb, upper->eb,
2745                                               slot, &eb);
2746                         btrfs_tree_unlock(eb);
2747                         free_extent_buffer(eb);
2748                         if (ret < 0) {
2749                                 err = ret;
2750                                 goto next;
2751                         }
2752                         BUG_ON(node->eb != eb);
2753                 } else {
2754                         btrfs_set_node_blockptr(upper->eb, slot,
2755                                                 node->eb->start);
2756                         btrfs_set_node_ptr_generation(upper->eb, slot,
2757                                                       trans->transid);
2758                         btrfs_mark_buffer_dirty(upper->eb);
2759
2760                         ret = btrfs_inc_extent_ref(trans, root,
2761                                                 node->eb->start, blocksize,
2762                                                 upper->eb->start,
2763                                                 btrfs_header_owner(upper->eb),
2764                                                 node->level, 0);
2765                         BUG_ON(ret);
2766
2767                         ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
2768                         BUG_ON(ret);
2769                 }
2770 next:
2771                 if (!upper->pending)
2772                         drop_node_buffer(upper);
2773                 else
2774                         unlock_node_buffer(upper);
2775                 if (err)
2776                         break;
2777         }
2778
2779         if (!err && node->pending) {
2780                 drop_node_buffer(node);
2781                 list_move_tail(&node->list, &rc->backref_cache.changed);
2782                 node->pending = 0;
2783         }
2784
2785         path->lowest_level = 0;
2786         BUG_ON(err == -ENOSPC);
2787         return err;
2788 }
2789
2790 static int link_to_upper(struct btrfs_trans_handle *trans,
2791                          struct reloc_control *rc,
2792                          struct backref_node *node,
2793                          struct btrfs_path *path)
2794 {
2795         struct btrfs_key key;
2796
2797         btrfs_node_key_to_cpu(node->eb, &key, 0);
2798         return do_relocation(trans, rc, node, &key, path, 0);
2799 }
2800
2801 static int finish_pending_nodes(struct btrfs_trans_handle *trans,
2802                                 struct reloc_control *rc,
2803                                 struct btrfs_path *path, int err)
2804 {
2805         LIST_HEAD(list);
2806         struct backref_cache *cache = &rc->backref_cache;
2807         struct backref_node *node;
2808         int level;
2809         int ret;
2810
2811         for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2812                 while (!list_empty(&cache->pending[level])) {
2813                         node = list_entry(cache->pending[level].next,
2814                                           struct backref_node, list);
2815                         list_move_tail(&node->list, &list);
2816                         BUG_ON(!node->pending);
2817
2818                         if (!err) {
2819                                 ret = link_to_upper(trans, rc, node, path);
2820                                 if (ret < 0)
2821                                         err = ret;
2822                         }
2823                 }
2824                 list_splice_init(&list, &cache->pending[level]);
2825         }
2826         return err;
2827 }
2828
2829 static void mark_block_processed(struct reloc_control *rc,
2830                                  u64 bytenr, u32 blocksize)
2831 {
2832         set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1,
2833                         EXTENT_DIRTY);
2834 }
2835
2836 static void __mark_block_processed(struct reloc_control *rc,
2837                                    struct backref_node *node)
2838 {
2839         u32 blocksize;
2840         if (node->level == 0 ||
2841             in_block_group(node->bytenr, rc->block_group)) {
2842                 blocksize = rc->extent_root->fs_info->nodesize;
2843                 mark_block_processed(rc, node->bytenr, blocksize);
2844         }
2845         node->processed = 1;
2846 }
2847
2848 /*
2849  * mark a block and all blocks directly/indirectly reference the block
2850  * as processed.
2851  */
2852 static void update_processed_blocks(struct reloc_control *rc,
2853                                     struct backref_node *node)
2854 {
2855         struct backref_node *next = node;
2856         struct backref_edge *edge;
2857         struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2858         int index = 0;
2859
2860         while (next) {
2861                 cond_resched();
2862                 while (1) {
2863                         if (next->processed)
2864                                 break;
2865
2866                         __mark_block_processed(rc, next);
2867
2868                         if (list_empty(&next->upper))
2869                                 break;
2870
2871                         edge = list_entry(next->upper.next,
2872                                           struct backref_edge, list[LOWER]);
2873                         edges[index++] = edge;
2874                         next = edge->node[UPPER];
2875                 }
2876                 next = walk_down_backref(edges, &index);
2877         }
2878 }
2879
2880 static int tree_block_processed(u64 bytenr, struct reloc_control *rc)
2881 {
2882         u32 blocksize = rc->extent_root->fs_info->nodesize;
2883
2884         if (test_range_bit(&rc->processed_blocks, bytenr,
2885                            bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
2886                 return 1;
2887         return 0;
2888 }
2889
2890 static int get_tree_block_key(struct btrfs_fs_info *fs_info,
2891                               struct tree_block *block)
2892 {
2893         struct extent_buffer *eb;
2894
2895         BUG_ON(block->key_ready);
2896         eb = read_tree_block(fs_info, block->bytenr, block->key.offset,
2897                              block->level, NULL);
2898         if (IS_ERR(eb)) {
2899                 return PTR_ERR(eb);
2900         } else if (!extent_buffer_uptodate(eb)) {
2901                 free_extent_buffer(eb);
2902                 return -EIO;
2903         }
2904         WARN_ON(btrfs_header_level(eb) != block->level);
2905         if (block->level == 0)
2906                 btrfs_item_key_to_cpu(eb, &block->key, 0);
2907         else
2908                 btrfs_node_key_to_cpu(eb, &block->key, 0);
2909         free_extent_buffer(eb);
2910         block->key_ready = 1;
2911         return 0;
2912 }
2913
2914 /*
2915  * helper function to relocate a tree block
2916  */
2917 static int relocate_tree_block(struct btrfs_trans_handle *trans,
2918                                 struct reloc_control *rc,
2919                                 struct backref_node *node,
2920                                 struct btrfs_key *key,
2921                                 struct btrfs_path *path)
2922 {
2923         struct btrfs_root *root;
2924         int ret = 0;
2925
2926         if (!node)
2927                 return 0;
2928
2929         BUG_ON(node->processed);
2930         root = select_one_root(node);
2931         if (root == ERR_PTR(-ENOENT)) {
2932                 update_processed_blocks(rc, node);
2933                 goto out;
2934         }
2935
2936         if (!root || test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
2937                 ret = reserve_metadata_space(trans, rc, node);
2938                 if (ret)
2939                         goto out;
2940         }
2941
2942         if (root) {
2943                 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
2944                         BUG_ON(node->new_bytenr);
2945                         BUG_ON(!list_empty(&node->list));
2946                         btrfs_record_root_in_trans(trans, root);
2947                         root = root->reloc_root;
2948                         node->new_bytenr = root->node->start;
2949                         node->root = root;
2950                         list_add_tail(&node->list, &rc->backref_cache.changed);
2951                 } else {
2952                         path->lowest_level = node->level;
2953                         ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2954                         btrfs_release_path(path);
2955                         if (ret > 0)
2956                                 ret = 0;
2957                 }
2958                 if (!ret)
2959                         update_processed_blocks(rc, node);
2960         } else {
2961                 ret = do_relocation(trans, rc, node, key, path, 1);
2962         }
2963 out:
2964         if (ret || node->level == 0 || node->cowonly)
2965                 remove_backref_node(&rc->backref_cache, node);
2966         return ret;
2967 }
2968
2969 /*
2970  * relocate a list of blocks
2971  */
2972 static noinline_for_stack
2973 int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2974                          struct reloc_control *rc, struct rb_root *blocks)
2975 {
2976         struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2977         struct backref_node *node;
2978         struct btrfs_path *path;
2979         struct tree_block *block;
2980         struct rb_node *rb_node;
2981         int ret;
2982         int err = 0;
2983
2984         path = btrfs_alloc_path();
2985         if (!path) {
2986                 err = -ENOMEM;
2987                 goto out_free_blocks;
2988         }
2989
2990         rb_node = rb_first(blocks);
2991         while (rb_node) {
2992                 block = rb_entry(rb_node, struct tree_block, rb_node);
2993                 if (!block->key_ready)
2994                         readahead_tree_block(fs_info, block->bytenr);
2995                 rb_node = rb_next(rb_node);
2996         }
2997
2998         rb_node = rb_first(blocks);
2999         while (rb_node) {
3000                 block = rb_entry(rb_node, struct tree_block, rb_node);
3001                 if (!block->key_ready) {
3002                         err = get_tree_block_key(fs_info, block);
3003                         if (err)
3004                                 goto out_free_path;
3005                 }
3006                 rb_node = rb_next(rb_node);
3007         }
3008
3009         rb_node = rb_first(blocks);
3010         while (rb_node) {
3011                 block = rb_entry(rb_node, struct tree_block, rb_node);
3012
3013                 node = build_backref_tree(rc, &block->key,
3014                                           block->level, block->bytenr);
3015                 if (IS_ERR(node)) {
3016                         err = PTR_ERR(node);
3017                         goto out;
3018                 }
3019
3020                 ret = relocate_tree_block(trans, rc, node, &block->key,
3021                                           path);
3022                 if (ret < 0) {
3023                         if (ret != -EAGAIN || rb_node == rb_first(blocks))
3024                                 err = ret;
3025                         goto out;
3026                 }
3027                 rb_node = rb_next(rb_node);
3028         }
3029 out:
3030         err = finish_pending_nodes(trans, rc, path, err);
3031
3032 out_free_path:
3033         btrfs_free_path(path);
3034 out_free_blocks:
3035         free_block_list(blocks);
3036         return err;
3037 }
3038
3039 static noinline_for_stack
3040 int prealloc_file_extent_cluster(struct inode *inode,
3041                                  struct file_extent_cluster *cluster)
3042 {
3043         u64 alloc_hint = 0;
3044         u64 start;
3045         u64 end;
3046         u64 offset = BTRFS_I(inode)->index_cnt;
3047         u64 num_bytes;
3048         int nr = 0;
3049         int ret = 0;
3050         u64 prealloc_start = cluster->start - offset;
3051         u64 prealloc_end = cluster->end - offset;
3052         u64 cur_offset;
3053         struct extent_changeset *data_reserved = NULL;
3054
3055         BUG_ON(cluster->start != cluster->boundary[0]);
3056         inode_lock(inode);
3057
3058         ret = btrfs_check_data_free_space(inode, &data_reserved, prealloc_start,
3059                                           prealloc_end + 1 - prealloc_start);
3060         if (ret)
3061                 goto out;
3062
3063         cur_offset = prealloc_start;
3064         while (nr < cluster->nr) {
3065                 start = cluster->boundary[nr] - offset;
3066                 if (nr + 1 < cluster->nr)
3067                         end = cluster->boundary[nr + 1] - 1 - offset;
3068                 else
3069                         end = cluster->end - offset;
3070
3071                 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
3072                 num_bytes = end + 1 - start;
3073                 if (cur_offset < start)
3074                         btrfs_free_reserved_data_space(inode, data_reserved,
3075                                         cur_offset, start - cur_offset);
3076                 ret = btrfs_prealloc_file_range(inode, 0, start,
3077                                                 num_bytes, num_bytes,
3078                                                 end + 1, &alloc_hint);
3079                 cur_offset = end + 1;
3080                 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
3081                 if (ret)
3082                         break;
3083                 nr++;
3084         }
3085         if (cur_offset < prealloc_end)
3086                 btrfs_free_reserved_data_space(inode, data_reserved,
3087                                 cur_offset, prealloc_end + 1 - cur_offset);
3088 out:
3089         inode_unlock(inode);
3090         extent_changeset_free(data_reserved);
3091         return ret;
3092 }
3093
3094 static noinline_for_stack
3095 int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
3096                          u64 block_start)
3097 {
3098         struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3099         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
3100         struct extent_map *em;
3101         int ret = 0;
3102
3103         em = alloc_extent_map();
3104         if (!em)
3105                 return -ENOMEM;
3106
3107         em->start = start;
3108         em->len = end + 1 - start;
3109         em->block_len = em->len;
3110         em->block_start = block_start;
3111         em->bdev = fs_info->fs_devices->latest_bdev;
3112         set_bit(EXTENT_FLAG_PINNED, &em->flags);
3113
3114         lock_extent(&BTRFS_I(inode)->io_tree, start, end);
3115         while (1) {
3116                 write_lock(&em_tree->lock);
3117                 ret = add_extent_mapping(em_tree, em, 0);
3118                 write_unlock(&em_tree->lock);
3119                 if (ret != -EEXIST) {
3120                         free_extent_map(em);
3121                         break;
3122                 }
3123                 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
3124         }
3125         unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
3126         return ret;
3127 }
3128
3129 static int relocate_file_extent_cluster(struct inode *inode,
3130                                         struct file_extent_cluster *cluster)
3131 {
3132         struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3133         u64 page_start;
3134         u64 page_end;
3135         u64 offset = BTRFS_I(inode)->index_cnt;
3136         unsigned long index;
3137         unsigned long last_index;
3138         struct page *page;
3139         struct file_ra_state *ra;
3140         gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
3141         int nr = 0;
3142         int ret = 0;
3143
3144         if (!cluster->nr)
3145                 return 0;
3146
3147         ra = kzalloc(sizeof(*ra), GFP_NOFS);
3148         if (!ra)
3149                 return -ENOMEM;
3150
3151         ret = prealloc_file_extent_cluster(inode, cluster);
3152         if (ret)
3153                 goto out;
3154
3155         file_ra_state_init(ra, inode->i_mapping);
3156
3157         ret = setup_extent_mapping(inode, cluster->start - offset,
3158                                    cluster->end - offset, cluster->start);
3159         if (ret)
3160                 goto out;
3161
3162         index = (cluster->start - offset) >> PAGE_SHIFT;
3163         last_index = (cluster->end - offset) >> PAGE_SHIFT;
3164         while (index <= last_index) {
3165                 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
3166                                 PAGE_SIZE);
3167                 if (ret)
3168                         goto out;
3169
3170                 page = find_lock_page(inode->i_mapping, index);
3171                 if (!page) {
3172                         page_cache_sync_readahead(inode->i_mapping,
3173                                                   ra, NULL, index,
3174                                                   last_index + 1 - index);
3175                         page = find_or_create_page(inode->i_mapping, index,
3176                                                    mask);
3177                         if (!page) {
3178                                 btrfs_delalloc_release_metadata(BTRFS_I(inode),
3179                                                         PAGE_SIZE, true);
3180                                 ret = -ENOMEM;
3181                                 goto out;
3182                         }
3183                 }
3184
3185                 if (PageReadahead(page)) {
3186                         page_cache_async_readahead(inode->i_mapping,
3187                                                    ra, NULL, page, index,
3188                                                    last_index + 1 - index);
3189                 }
3190
3191                 if (!PageUptodate(page)) {
3192                         btrfs_readpage(NULL, page);
3193                         lock_page(page);
3194                         if (!PageUptodate(page)) {
3195                                 unlock_page(page);
3196                                 put_page(page);
3197                                 btrfs_delalloc_release_metadata(BTRFS_I(inode),
3198                                                         PAGE_SIZE, true);
3199                                 btrfs_delalloc_release_extents(BTRFS_I(inode),
3200                                                                PAGE_SIZE, true);
3201                                 ret = -EIO;
3202                                 goto out;
3203                         }
3204                 }
3205
3206                 page_start = page_offset(page);
3207                 page_end = page_start + PAGE_SIZE - 1;
3208
3209                 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
3210
3211                 set_page_extent_mapped(page);
3212
3213                 if (nr < cluster->nr &&
3214                     page_start + offset == cluster->boundary[nr]) {
3215                         set_extent_bits(&BTRFS_I(inode)->io_tree,
3216                                         page_start, page_end,
3217                                         EXTENT_BOUNDARY);
3218                         nr++;
3219                 }
3220
3221                 ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
3222                                                 NULL, 0);
3223                 if (ret) {
3224                         unlock_page(page);
3225                         put_page(page);
3226                         btrfs_delalloc_release_metadata(BTRFS_I(inode),
3227                                                          PAGE_SIZE, true);
3228                         btrfs_delalloc_release_extents(BTRFS_I(inode),
3229                                                        PAGE_SIZE, true);
3230
3231                         clear_extent_bits(&BTRFS_I(inode)->io_tree,
3232                                           page_start, page_end,
3233                                           EXTENT_LOCKED | EXTENT_BOUNDARY);
3234                         goto out;
3235
3236                 }
3237                 set_page_dirty(page);
3238
3239                 unlock_extent(&BTRFS_I(inode)->io_tree,
3240                               page_start, page_end);
3241                 unlock_page(page);
3242                 put_page(page);
3243
3244                 index++;
3245                 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE,
3246                                                false);
3247                 balance_dirty_pages_ratelimited(inode->i_mapping);
3248                 btrfs_throttle(fs_info);
3249         }
3250         WARN_ON(nr != cluster->nr);
3251 out:
3252         kfree(ra);
3253         return ret;
3254 }
3255
3256 static noinline_for_stack
3257 int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
3258                          struct file_extent_cluster *cluster)
3259 {
3260         int ret;
3261
3262         if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
3263                 ret = relocate_file_extent_cluster(inode, cluster);
3264                 if (ret)
3265                         return ret;
3266                 cluster->nr = 0;
3267         }
3268
3269         if (!cluster->nr)
3270                 cluster->start = extent_key->objectid;
3271         else
3272                 BUG_ON(cluster->nr >= MAX_EXTENTS);
3273         cluster->end = extent_key->objectid + extent_key->offset - 1;
3274         cluster->boundary[cluster->nr] = extent_key->objectid;
3275         cluster->nr++;
3276
3277         if (cluster->nr >= MAX_EXTENTS) {
3278                 ret = relocate_file_extent_cluster(inode, cluster);
3279                 if (ret)
3280                         return ret;
3281                 cluster->nr = 0;
3282         }
3283         return 0;
3284 }
3285
3286 /*
3287  * helper to add a tree block to the list.
3288  * the major work is getting the generation and level of the block
3289  */
3290 static int add_tree_block(struct reloc_control *rc,
3291                           struct btrfs_key *extent_key,
3292                           struct btrfs_path *path,
3293                           struct rb_root *blocks)
3294 {
3295         struct extent_buffer *eb;
3296         struct btrfs_extent_item *ei;
3297         struct btrfs_tree_block_info *bi;
3298         struct tree_block *block;
3299         struct rb_node *rb_node;
3300         u32 item_size;
3301         int level = -1;
3302         u64 generation;
3303
3304         eb =  path->nodes[0];
3305         item_size = btrfs_item_size_nr(eb, path->slots[0]);
3306
3307         if (extent_key->type == BTRFS_METADATA_ITEM_KEY ||
3308             item_size >= sizeof(*ei) + sizeof(*bi)) {
3309                 ei = btrfs_item_ptr(eb, path->slots[0],
3310                                 struct btrfs_extent_item);
3311                 if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) {
3312                         bi = (struct btrfs_tree_block_info *)(ei + 1);
3313                         level = btrfs_tree_block_level(eb, bi);
3314                 } else {
3315                         level = (int)extent_key->offset;
3316                 }
3317                 generation = btrfs_extent_generation(eb, ei);
3318         } else {
3319                 BUG();
3320         }
3321
3322         btrfs_release_path(path);
3323
3324         BUG_ON(level == -1);
3325
3326         block = kmalloc(sizeof(*block), GFP_NOFS);
3327         if (!block)
3328                 return -ENOMEM;
3329
3330         block->bytenr = extent_key->objectid;
3331         block->key.objectid = rc->extent_root->fs_info->nodesize;
3332         block->key.offset = generation;
3333         block->level = level;
3334         block->key_ready = 0;
3335
3336         rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
3337         if (rb_node)
3338                 backref_tree_panic(rb_node, -EEXIST, block->bytenr);
3339
3340         return 0;
3341 }
3342
3343 /*
3344  * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY
3345  */
3346 static int __add_tree_block(struct reloc_control *rc,
3347                             u64 bytenr, u32 blocksize,
3348                             struct rb_root *blocks)
3349 {
3350         struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3351         struct btrfs_path *path;
3352         struct btrfs_key key;
3353         int ret;
3354         bool skinny = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
3355
3356         if (tree_block_processed(bytenr, rc))
3357                 return 0;
3358
3359         if (tree_search(blocks, bytenr))
3360                 return 0;
3361
3362         path = btrfs_alloc_path();
3363         if (!path)
3364                 return -ENOMEM;
3365 again:
3366         key.objectid = bytenr;
3367         if (skinny) {
3368                 key.type = BTRFS_METADATA_ITEM_KEY;
3369                 key.offset = (u64)-1;
3370         } else {
3371                 key.type = BTRFS_EXTENT_ITEM_KEY;
3372                 key.offset = blocksize;
3373         }
3374
3375         path->search_commit_root = 1;
3376         path->skip_locking = 1;
3377         ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
3378         if (ret < 0)
3379                 goto out;
3380
3381         if (ret > 0 && skinny) {
3382                 if (path->slots[0]) {
3383                         path->slots[0]--;
3384                         btrfs_item_key_to_cpu(path->nodes[0], &key,
3385                                               path->slots[0]);
3386                         if (key.objectid == bytenr &&
3387                             (key.type == BTRFS_METADATA_ITEM_KEY ||
3388                              (key.type == BTRFS_EXTENT_ITEM_KEY &&
3389                               key.offset == blocksize)))
3390                                 ret = 0;
3391                 }
3392
3393                 if (ret) {
3394                         skinny = false;
3395                         btrfs_release_path(path);
3396                         goto again;
3397                 }
3398         }
3399         if (ret) {
3400                 ASSERT(ret == 1);
3401                 btrfs_print_leaf(path->nodes[0]);
3402                 btrfs_err(fs_info,
3403              "tree block extent item (%llu) is not found in extent tree",
3404                      bytenr);
3405                 WARN_ON(1);
3406                 ret = -EINVAL;
3407                 goto out;
3408         }
3409
3410         ret = add_tree_block(rc, &key, path, blocks);
3411 out:
3412         btrfs_free_path(path);
3413         return ret;
3414 }
3415
3416 /*
3417  * helper to check if the block use full backrefs for pointers in it
3418  */
3419 static int block_use_full_backref(struct reloc_control *rc,
3420                                   struct extent_buffer *eb)
3421 {
3422         u64 flags;
3423         int ret;
3424
3425         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) ||
3426             btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV)
3427                 return 1;
3428
3429         ret = btrfs_lookup_extent_info(NULL, rc->extent_root->fs_info,
3430                                        eb->start, btrfs_header_level(eb), 1,
3431                                        NULL, &flags);
3432         BUG_ON(ret);
3433
3434         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
3435                 ret = 1;
3436         else
3437                 ret = 0;
3438         return ret;
3439 }
3440
3441 static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3442                                     struct btrfs_block_group_cache *block_group,
3443                                     struct inode *inode,
3444                                     u64 ino)
3445 {
3446         struct btrfs_key key;
3447         struct btrfs_root *root = fs_info->tree_root;
3448         struct btrfs_trans_handle *trans;
3449         int ret = 0;
3450
3451         if (inode)
3452                 goto truncate;
3453
3454         key.objectid = ino;
3455         key.type = BTRFS_INODE_ITEM_KEY;
3456         key.offset = 0;
3457
3458         inode = btrfs_iget(fs_info->sb, &key, root, NULL);
3459         if (IS_ERR(inode) || is_bad_inode(inode)) {
3460                 if (!IS_ERR(inode))
3461                         iput(inode);
3462                 return -ENOENT;
3463         }
3464
3465 truncate:
3466         ret = btrfs_check_trunc_cache_free_space(fs_info,
3467                                                  &fs_info->global_block_rsv);
3468         if (ret)
3469                 goto out;
3470
3471         trans = btrfs_join_transaction(root);
3472         if (IS_ERR(trans)) {
3473                 ret = PTR_ERR(trans);
3474                 goto out;
3475         }
3476
3477         ret = btrfs_truncate_free_space_cache(trans, block_group, inode);
3478
3479         btrfs_end_transaction(trans);
3480         btrfs_btree_balance_dirty(fs_info);
3481 out:
3482         iput(inode);
3483         return ret;
3484 }
3485
3486 /*
3487  * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
3488  * this function scans fs tree to find blocks reference the data extent
3489  */
3490 static int find_data_references(struct reloc_control *rc,
3491                                 struct btrfs_key *extent_key,
3492                                 struct extent_buffer *leaf,
3493                                 struct btrfs_extent_data_ref *ref,
3494                                 struct rb_root *blocks)
3495 {
3496         struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3497         struct btrfs_path *path;
3498         struct tree_block *block;
3499         struct btrfs_root *root;
3500         struct btrfs_file_extent_item *fi;
3501         struct rb_node *rb_node;
3502         struct btrfs_key key;
3503         u64 ref_root;
3504         u64 ref_objectid;
3505         u64 ref_offset;
3506         u32 ref_count;
3507         u32 nritems;
3508         int err = 0;
3509         int added = 0;
3510         int counted;
3511         int ret;
3512
3513         ref_root = btrfs_extent_data_ref_root(leaf, ref);
3514         ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
3515         ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
3516         ref_count = btrfs_extent_data_ref_count(leaf, ref);
3517
3518         /*
3519          * This is an extent belonging to the free space cache, lets just delete
3520          * it and redo the search.
3521          */
3522         if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
3523                 ret = delete_block_group_cache(fs_info, rc->block_group,
3524                                                NULL, ref_objectid);
3525                 if (ret != -ENOENT)
3526                         return ret;
3527                 ret = 0;
3528         }
3529
3530         path = btrfs_alloc_path();
3531         if (!path)
3532                 return -ENOMEM;
3533         path->reada = READA_FORWARD;
3534
3535         root = read_fs_root(fs_info, ref_root);
3536         if (IS_ERR(root)) {
3537                 err = PTR_ERR(root);
3538                 goto out;
3539         }
3540
3541         key.objectid = ref_objectid;
3542         key.type = BTRFS_EXTENT_DATA_KEY;
3543         if (ref_offset > ((u64)-1 << 32))
3544                 key.offset = 0;
3545         else
3546                 key.offset = ref_offset;
3547
3548         path->search_commit_root = 1;
3549         path->skip_locking = 1;
3550         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3551         if (ret < 0) {
3552                 err = ret;
3553                 goto out;
3554         }
3555
3556         leaf = path->nodes[0];
3557         nritems = btrfs_header_nritems(leaf);
3558         /*
3559          * the references in tree blocks that use full backrefs
3560          * are not counted in
3561          */
3562         if (block_use_full_backref(rc, leaf))
3563                 counted = 0;
3564         else
3565                 counted = 1;
3566         rb_node = tree_search(blocks, leaf->start);
3567         if (rb_node) {
3568                 if (counted)
3569            &nb