btrfs: Drop fs_info parameter from btrfs_qgroup_account_extents
[sfrench/cifs-2.6.git] / fs / btrfs / ref-verify.c
1 /*
2  * Copyright (C) 2014 Facebook.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/stacktrace.h>
21 #include "ctree.h"
22 #include "disk-io.h"
23 #include "locking.h"
24 #include "delayed-ref.h"
25 #include "ref-verify.h"
26
27 /*
28  * Used to keep track the roots and number of refs each root has for a given
29  * bytenr.  This just tracks the number of direct references, no shared
30  * references.
31  */
32 struct root_entry {
33         u64 root_objectid;
34         u64 num_refs;
35         struct rb_node node;
36 };
37
38 /*
39  * These are meant to represent what should exist in the extent tree, these can
40  * be used to verify the extent tree is consistent as these should all match
41  * what the extent tree says.
42  */
43 struct ref_entry {
44         u64 root_objectid;
45         u64 parent;
46         u64 owner;
47         u64 offset;
48         u64 num_refs;
49         struct rb_node node;
50 };
51
52 #define MAX_TRACE       16
53
54 /*
55  * Whenever we add/remove a reference we record the action.  The action maps
56  * back to the delayed ref action.  We hold the ref we are changing in the
57  * action so we can account for the history properly, and we record the root we
58  * were called with since it could be different from ref_root.  We also store
59  * stack traces because thats how I roll.
60  */
61 struct ref_action {
62         int action;
63         u64 root;
64         struct ref_entry ref;
65         struct list_head list;
66         unsigned long trace[MAX_TRACE];
67         unsigned int trace_len;
68 };
69
70 /*
71  * One of these for every block we reference, it holds the roots and references
72  * to it as well as all of the ref actions that have occured to it.  We never
73  * free it until we unmount the file system in order to make sure re-allocations
74  * are happening properly.
75  */
76 struct block_entry {
77         u64 bytenr;
78         u64 len;
79         u64 num_refs;
80         int metadata;
81         int from_disk;
82         struct rb_root roots;
83         struct rb_root refs;
84         struct rb_node node;
85         struct list_head actions;
86 };
87
88 static struct block_entry *insert_block_entry(struct rb_root *root,
89                                               struct block_entry *be)
90 {
91         struct rb_node **p = &root->rb_node;
92         struct rb_node *parent_node = NULL;
93         struct block_entry *entry;
94
95         while (*p) {
96                 parent_node = *p;
97                 entry = rb_entry(parent_node, struct block_entry, node);
98                 if (entry->bytenr > be->bytenr)
99                         p = &(*p)->rb_left;
100                 else if (entry->bytenr < be->bytenr)
101                         p = &(*p)->rb_right;
102                 else
103                         return entry;
104         }
105
106         rb_link_node(&be->node, parent_node, p);
107         rb_insert_color(&be->node, root);
108         return NULL;
109 }
110
111 static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr)
112 {
113         struct rb_node *n;
114         struct block_entry *entry = NULL;
115
116         n = root->rb_node;
117         while (n) {
118                 entry = rb_entry(n, struct block_entry, node);
119                 if (entry->bytenr < bytenr)
120                         n = n->rb_right;
121                 else if (entry->bytenr > bytenr)
122                         n = n->rb_left;
123                 else
124                         return entry;
125         }
126         return NULL;
127 }
128
129 static struct root_entry *insert_root_entry(struct rb_root *root,
130                                             struct root_entry *re)
131 {
132         struct rb_node **p = &root->rb_node;
133         struct rb_node *parent_node = NULL;
134         struct root_entry *entry;
135
136         while (*p) {
137                 parent_node = *p;
138                 entry = rb_entry(parent_node, struct root_entry, node);
139                 if (entry->root_objectid > re->root_objectid)
140                         p = &(*p)->rb_left;
141                 else if (entry->root_objectid < re->root_objectid)
142                         p = &(*p)->rb_right;
143                 else
144                         return entry;
145         }
146
147         rb_link_node(&re->node, parent_node, p);
148         rb_insert_color(&re->node, root);
149         return NULL;
150
151 }
152
153 static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2)
154 {
155         if (ref1->root_objectid < ref2->root_objectid)
156                 return -1;
157         if (ref1->root_objectid > ref2->root_objectid)
158                 return 1;
159         if (ref1->parent < ref2->parent)
160                 return -1;
161         if (ref1->parent > ref2->parent)
162                 return 1;
163         if (ref1->owner < ref2->owner)
164                 return -1;
165         if (ref1->owner > ref2->owner)
166                 return 1;
167         if (ref1->offset < ref2->offset)
168                 return -1;
169         if (ref1->offset > ref2->offset)
170                 return 1;
171         return 0;
172 }
173
174 static struct ref_entry *insert_ref_entry(struct rb_root *root,
175                                           struct ref_entry *ref)
176 {
177         struct rb_node **p = &root->rb_node;
178         struct rb_node *parent_node = NULL;
179         struct ref_entry *entry;
180         int cmp;
181
182         while (*p) {
183                 parent_node = *p;
184                 entry = rb_entry(parent_node, struct ref_entry, node);
185                 cmp = comp_refs(entry, ref);
186                 if (cmp > 0)
187                         p = &(*p)->rb_left;
188                 else if (cmp < 0)
189                         p = &(*p)->rb_right;
190                 else
191                         return entry;
192         }
193
194         rb_link_node(&ref->node, parent_node, p);
195         rb_insert_color(&ref->node, root);
196         return NULL;
197
198 }
199
200 static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid)
201 {
202         struct rb_node *n;
203         struct root_entry *entry = NULL;
204
205         n = root->rb_node;
206         while (n) {
207                 entry = rb_entry(n, struct root_entry, node);
208                 if (entry->root_objectid < objectid)
209                         n = n->rb_right;
210                 else if (entry->root_objectid > objectid)
211                         n = n->rb_left;
212                 else
213                         return entry;
214         }
215         return NULL;
216 }
217
218 #ifdef CONFIG_STACKTRACE
219 static void __save_stack_trace(struct ref_action *ra)
220 {
221         struct stack_trace stack_trace;
222
223         stack_trace.max_entries = MAX_TRACE;
224         stack_trace.nr_entries = 0;
225         stack_trace.entries = ra->trace;
226         stack_trace.skip = 2;
227         save_stack_trace(&stack_trace);
228         ra->trace_len = stack_trace.nr_entries;
229 }
230
231 static void __print_stack_trace(struct btrfs_fs_info *fs_info,
232                                 struct ref_action *ra)
233 {
234         struct stack_trace trace;
235
236         if (ra->trace_len == 0) {
237                 btrfs_err(fs_info, "  ref-verify: no stacktrace");
238                 return;
239         }
240         trace.nr_entries = ra->trace_len;
241         trace.entries = ra->trace;
242         print_stack_trace(&trace, 2);
243 }
244 #else
245 static void inline __save_stack_trace(struct ref_action *ra)
246 {
247 }
248
249 static void inline __print_stack_trace(struct btrfs_fs_info *fs_info,
250                                        struct ref_action *ra)
251 {
252         btrfs_err(fs_info, "  ref-verify: no stacktrace support");
253 }
254 #endif
255
256 static void free_block_entry(struct block_entry *be)
257 {
258         struct root_entry *re;
259         struct ref_entry *ref;
260         struct ref_action *ra;
261         struct rb_node *n;
262
263         while ((n = rb_first(&be->roots))) {
264                 re = rb_entry(n, struct root_entry, node);
265                 rb_erase(&re->node, &be->roots);
266                 kfree(re);
267         }
268
269         while((n = rb_first(&be->refs))) {
270                 ref = rb_entry(n, struct ref_entry, node);
271                 rb_erase(&ref->node, &be->refs);
272                 kfree(ref);
273         }
274
275         while (!list_empty(&be->actions)) {
276                 ra = list_first_entry(&be->actions, struct ref_action,
277                                       list);
278                 list_del(&ra->list);
279                 kfree(ra);
280         }
281         kfree(be);
282 }
283
284 static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info,
285                                            u64 bytenr, u64 len,
286                                            u64 root_objectid)
287 {
288         struct block_entry *be = NULL, *exist;
289         struct root_entry *re = NULL;
290
291         re = kzalloc(sizeof(struct root_entry), GFP_KERNEL);
292         be = kzalloc(sizeof(struct block_entry), GFP_KERNEL);
293         if (!be || !re) {
294                 kfree(re);
295                 kfree(be);
296                 return ERR_PTR(-ENOMEM);
297         }
298         be->bytenr = bytenr;
299         be->len = len;
300
301         re->root_objectid = root_objectid;
302         re->num_refs = 0;
303
304         spin_lock(&fs_info->ref_verify_lock);
305         exist = insert_block_entry(&fs_info->block_tree, be);
306         if (exist) {
307                 if (root_objectid) {
308                         struct root_entry *exist_re;
309
310                         exist_re = insert_root_entry(&exist->roots, re);
311                         if (exist_re)
312                                 kfree(re);
313                 }
314                 kfree(be);
315                 return exist;
316         }
317
318         be->num_refs = 0;
319         be->metadata = 0;
320         be->from_disk = 0;
321         be->roots = RB_ROOT;
322         be->refs = RB_ROOT;
323         INIT_LIST_HEAD(&be->actions);
324         if (root_objectid)
325                 insert_root_entry(&be->roots, re);
326         else
327                 kfree(re);
328         return be;
329 }
330
331 static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root,
332                           u64 parent, u64 bytenr, int level)
333 {
334         struct block_entry *be;
335         struct root_entry *re;
336         struct ref_entry *ref = NULL, *exist;
337
338         ref = kmalloc(sizeof(struct ref_entry), GFP_KERNEL);
339         if (!ref)
340                 return -ENOMEM;
341
342         if (parent)
343                 ref->root_objectid = 0;
344         else
345                 ref->root_objectid = ref_root;
346         ref->parent = parent;
347         ref->owner = level;
348         ref->offset = 0;
349         ref->num_refs = 1;
350
351         be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root);
352         if (IS_ERR(be)) {
353                 kfree(ref);
354                 return PTR_ERR(be);
355         }
356         be->num_refs++;
357         be->from_disk = 1;
358         be->metadata = 1;
359
360         if (!parent) {
361                 ASSERT(ref_root);
362                 re = lookup_root_entry(&be->roots, ref_root);
363                 ASSERT(re);
364                 re->num_refs++;
365         }
366         exist = insert_ref_entry(&be->refs, ref);
367         if (exist) {
368                 exist->num_refs++;
369                 kfree(ref);
370         }
371         spin_unlock(&fs_info->ref_verify_lock);
372
373         return 0;
374 }
375
376 static int add_shared_data_ref(struct btrfs_fs_info *fs_info,
377                                u64 parent, u32 num_refs, u64 bytenr,
378                                u64 num_bytes)
379 {
380         struct block_entry *be;
381         struct ref_entry *ref;
382
383         ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
384         if (!ref)
385                 return -ENOMEM;
386         be = add_block_entry(fs_info, bytenr, num_bytes, 0);
387         if (IS_ERR(be)) {
388                 kfree(ref);
389                 return PTR_ERR(be);
390         }
391         be->num_refs += num_refs;
392
393         ref->parent = parent;
394         ref->num_refs = num_refs;
395         if (insert_ref_entry(&be->refs, ref)) {
396                 spin_unlock(&fs_info->ref_verify_lock);
397                 btrfs_err(fs_info, "existing shared ref when reading from disk?");
398                 kfree(ref);
399                 return -EINVAL;
400         }
401         spin_unlock(&fs_info->ref_verify_lock);
402         return 0;
403 }
404
405 static int add_extent_data_ref(struct btrfs_fs_info *fs_info,
406                                struct extent_buffer *leaf,
407                                struct btrfs_extent_data_ref *dref,
408                                u64 bytenr, u64 num_bytes)
409 {
410         struct block_entry *be;
411         struct ref_entry *ref;
412         struct root_entry *re;
413         u64 ref_root = btrfs_extent_data_ref_root(leaf, dref);
414         u64 owner = btrfs_extent_data_ref_objectid(leaf, dref);
415         u64 offset = btrfs_extent_data_ref_offset(leaf, dref);
416         u32 num_refs = btrfs_extent_data_ref_count(leaf, dref);
417
418         ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
419         if (!ref)
420                 return -ENOMEM;
421         be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
422         if (IS_ERR(be)) {
423                 kfree(ref);
424                 return PTR_ERR(be);
425         }
426         be->num_refs += num_refs;
427
428         ref->parent = 0;
429         ref->owner = owner;
430         ref->root_objectid = ref_root;
431         ref->offset = offset;
432         ref->num_refs = num_refs;
433         if (insert_ref_entry(&be->refs, ref)) {
434                 spin_unlock(&fs_info->ref_verify_lock);
435                 btrfs_err(fs_info, "existing ref when reading from disk?");
436                 kfree(ref);
437                 return -EINVAL;
438         }
439
440         re = lookup_root_entry(&be->roots, ref_root);
441         if (!re) {
442                 spin_unlock(&fs_info->ref_verify_lock);
443                 btrfs_err(fs_info, "missing root in new block entry?");
444                 return -EINVAL;
445         }
446         re->num_refs += num_refs;
447         spin_unlock(&fs_info->ref_verify_lock);
448         return 0;
449 }
450
451 static int process_extent_item(struct btrfs_fs_info *fs_info,
452                                struct btrfs_path *path, struct btrfs_key *key,
453                                int slot, int *tree_block_level)
454 {
455         struct btrfs_extent_item *ei;
456         struct btrfs_extent_inline_ref *iref;
457         struct btrfs_extent_data_ref *dref;
458         struct btrfs_shared_data_ref *sref;
459         struct extent_buffer *leaf = path->nodes[0];
460         u32 item_size = btrfs_item_size_nr(leaf, slot);
461         unsigned long end, ptr;
462         u64 offset, flags, count;
463         int type, ret;
464
465         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
466         flags = btrfs_extent_flags(leaf, ei);
467
468         if ((key->type == BTRFS_EXTENT_ITEM_KEY) &&
469             flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
470                 struct btrfs_tree_block_info *info;
471
472                 info = (struct btrfs_tree_block_info *)(ei + 1);
473                 *tree_block_level = btrfs_tree_block_level(leaf, info);
474                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
475         } else {
476                 if (key->type == BTRFS_METADATA_ITEM_KEY)
477                         *tree_block_level = key->offset;
478                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
479         }
480
481         ptr = (unsigned long)iref;
482         end = (unsigned long)ei + item_size;
483         while (ptr < end) {
484                 iref = (struct btrfs_extent_inline_ref *)ptr;
485                 type = btrfs_extent_inline_ref_type(leaf, iref);
486                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
487                 switch (type) {
488                 case BTRFS_TREE_BLOCK_REF_KEY:
489                         ret = add_tree_block(fs_info, offset, 0, key->objectid,
490                                              *tree_block_level);
491                         break;
492                 case BTRFS_SHARED_BLOCK_REF_KEY:
493                         ret = add_tree_block(fs_info, 0, offset, key->objectid,
494                                              *tree_block_level);
495                         break;
496                 case BTRFS_EXTENT_DATA_REF_KEY:
497                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
498                         ret = add_extent_data_ref(fs_info, leaf, dref,
499                                                   key->objectid, key->offset);
500                         break;
501                 case BTRFS_SHARED_DATA_REF_KEY:
502                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
503                         count = btrfs_shared_data_ref_count(leaf, sref);
504                         ret = add_shared_data_ref(fs_info, offset, count,
505                                                   key->objectid, key->offset);
506                         break;
507                 default:
508                         btrfs_err(fs_info, "invalid key type in iref");
509                         ret = -EINVAL;
510                         break;
511                 }
512                 if (ret)
513                         break;
514                 ptr += btrfs_extent_inline_ref_size(type);
515         }
516         return ret;
517 }
518
519 static int process_leaf(struct btrfs_root *root,
520                         struct btrfs_path *path, u64 *bytenr, u64 *num_bytes)
521 {
522         struct btrfs_fs_info *fs_info = root->fs_info;
523         struct extent_buffer *leaf = path->nodes[0];
524         struct btrfs_extent_data_ref *dref;
525         struct btrfs_shared_data_ref *sref;
526         u32 count;
527         int i = 0, tree_block_level = 0, ret;
528         struct btrfs_key key;
529         int nritems = btrfs_header_nritems(leaf);
530
531         for (i = 0; i < nritems; i++) {
532                 btrfs_item_key_to_cpu(leaf, &key, i);
533                 switch (key.type) {
534                 case BTRFS_EXTENT_ITEM_KEY:
535                         *num_bytes = key.offset;
536                 case BTRFS_METADATA_ITEM_KEY:
537                         *bytenr = key.objectid;
538                         ret = process_extent_item(fs_info, path, &key, i,
539                                                   &tree_block_level);
540                         break;
541                 case BTRFS_TREE_BLOCK_REF_KEY:
542                         ret = add_tree_block(fs_info, key.offset, 0,
543                                              key.objectid, tree_block_level);
544                         break;
545                 case BTRFS_SHARED_BLOCK_REF_KEY:
546                         ret = add_tree_block(fs_info, 0, key.offset,
547                                              key.objectid, tree_block_level);
548                         break;
549                 case BTRFS_EXTENT_DATA_REF_KEY:
550                         dref = btrfs_item_ptr(leaf, i,
551                                               struct btrfs_extent_data_ref);
552                         ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr,
553                                                   *num_bytes);
554                         break;
555                 case BTRFS_SHARED_DATA_REF_KEY:
556                         sref = btrfs_item_ptr(leaf, i,
557                                               struct btrfs_shared_data_ref);
558                         count = btrfs_shared_data_ref_count(leaf, sref);
559                         ret = add_shared_data_ref(fs_info, key.offset, count,
560                                                   *bytenr, *num_bytes);
561                         break;
562                 default:
563                         break;
564                 }
565                 if (ret)
566                         break;
567         }
568         return ret;
569 }
570
571 /* Walk down to the leaf from the given level */
572 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
573                           int level, u64 *bytenr, u64 *num_bytes)
574 {
575         struct btrfs_fs_info *fs_info = root->fs_info;
576         struct extent_buffer *eb;
577         u64 block_bytenr, gen;
578         int ret = 0;
579
580         while (level >= 0) {
581                 if (level) {
582                         block_bytenr = btrfs_node_blockptr(path->nodes[level],
583                                                            path->slots[level]);
584                         gen = btrfs_node_ptr_generation(path->nodes[level],
585                                                         path->slots[level]);
586                         eb = read_tree_block(fs_info, block_bytenr, gen);
587                         if (IS_ERR(eb))
588                                 return PTR_ERR(eb);
589                         if (!extent_buffer_uptodate(eb)) {
590                                 free_extent_buffer(eb);
591                                 return -EIO;
592                         }
593                         btrfs_tree_read_lock(eb);
594                         btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
595                         path->nodes[level-1] = eb;
596                         path->slots[level-1] = 0;
597                         path->locks[level-1] = BTRFS_READ_LOCK_BLOCKING;
598                 } else {
599                         ret = process_leaf(root, path, bytenr, num_bytes);
600                         if (ret)
601                                 break;
602                 }
603                 level--;
604         }
605         return ret;
606 }
607
608 /* Walk up to the next node that needs to be processed */
609 static int walk_up_tree(struct btrfs_path *path, int *level)
610 {
611         int l;
612
613         for (l = 0; l < BTRFS_MAX_LEVEL; l++) {
614                 if (!path->nodes[l])
615                         continue;
616                 if (l) {
617                         path->slots[l]++;
618                         if (path->slots[l] <
619                             btrfs_header_nritems(path->nodes[l])) {
620                                 *level = l;
621                                 return 0;
622                         }
623                 }
624                 btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]);
625                 free_extent_buffer(path->nodes[l]);
626                 path->nodes[l] = NULL;
627                 path->slots[l] = 0;
628                 path->locks[l] = 0;
629         }
630
631         return 1;
632 }
633
634 static void dump_ref_action(struct btrfs_fs_info *fs_info,
635                             struct ref_action *ra)
636 {
637         btrfs_err(fs_info,
638 "  Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
639                   ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent,
640                   ra->ref.owner, ra->ref.offset, ra->ref.num_refs);
641         __print_stack_trace(fs_info, ra);
642 }
643
644 /*
645  * Dumps all the information from the block entry to printk, it's going to be
646  * awesome.
647  */
648 static void dump_block_entry(struct btrfs_fs_info *fs_info,
649                              struct block_entry *be)
650 {
651         struct ref_entry *ref;
652         struct root_entry *re;
653         struct ref_action *ra;
654         struct rb_node *n;
655
656         btrfs_err(fs_info,
657 "dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d",
658                   be->bytenr, be->len, be->num_refs, be->metadata,
659                   be->from_disk);
660
661         for (n = rb_first(&be->refs); n; n = rb_next(n)) {
662                 ref = rb_entry(n, struct ref_entry, node);
663                 btrfs_err(fs_info,
664 "  ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
665                           ref->root_objectid, ref->parent, ref->owner,
666                           ref->offset, ref->num_refs);
667         }
668
669         for (n = rb_first(&be->roots); n; n = rb_next(n)) {
670                 re = rb_entry(n, struct root_entry, node);
671                 btrfs_err(fs_info, "  root entry %llu, num_refs %llu",
672                           re->root_objectid, re->num_refs);
673         }
674
675         list_for_each_entry(ra, &be->actions, list)
676                 dump_ref_action(fs_info, ra);
677 }
678
679 /*
680  * btrfs_ref_tree_mod: called when we modify a ref for a bytenr
681  * @root: the root we are making this modification from.
682  * @bytenr: the bytenr we are modifying.
683  * @num_bytes: number of bytes.
684  * @parent: the parent bytenr.
685  * @ref_root: the original root owner of the bytenr.
686  * @owner: level in the case of metadata, inode in the case of data.
687  * @offset: 0 for metadata, file offset for data.
688  * @action: the action that we are doing, this is the same as the delayed ref
689  *      action.
690  *
691  * This will add an action item to the given bytenr and do sanity checks to make
692  * sure we haven't messed something up.  If we are making a new allocation and
693  * this block entry has history we will delete all previous actions as long as
694  * our sanity checks pass as they are no longer needed.
695  */
696 int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
697                        u64 parent, u64 ref_root, u64 owner, u64 offset,
698                        int action)
699 {
700         struct btrfs_fs_info *fs_info = root->fs_info;
701         struct ref_entry *ref = NULL, *exist;
702         struct ref_action *ra = NULL;
703         struct block_entry *be = NULL;
704         struct root_entry *re = NULL;
705         int ret = 0;
706         bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
707
708         if (!btrfs_test_opt(root->fs_info, REF_VERIFY))
709                 return 0;
710
711         ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
712         ra = kmalloc(sizeof(struct ref_action), GFP_NOFS);
713         if (!ra || !ref) {
714                 kfree(ref);
715                 kfree(ra);
716                 ret = -ENOMEM;
717                 goto out;
718         }
719
720         if (parent) {
721                 ref->parent = parent;
722         } else {
723                 ref->root_objectid = ref_root;
724                 ref->owner = owner;
725                 ref->offset = offset;
726         }
727         ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1;
728
729         memcpy(&ra->ref, ref, sizeof(struct ref_entry));
730         /*
731          * Save the extra info from the delayed ref in the ref action to make it
732          * easier to figure out what is happening.  The real ref's we add to the
733          * ref tree need to reflect what we save on disk so it matches any
734          * on-disk refs we pre-loaded.
735          */
736         ra->ref.owner = owner;
737         ra->ref.offset = offset;
738         ra->ref.root_objectid = ref_root;
739         __save_stack_trace(ra);
740
741         INIT_LIST_HEAD(&ra->list);
742         ra->action = action;
743         ra->root = root->objectid;
744
745         /*
746          * This is an allocation, preallocate the block_entry in case we haven't
747          * used it before.
748          */
749         ret = -EINVAL;
750         if (action == BTRFS_ADD_DELAYED_EXTENT) {
751                 /*
752                  * For subvol_create we'll just pass in whatever the parent root
753                  * is and the new root objectid, so let's not treat the passed
754                  * in root as if it really has a ref for this bytenr.
755                  */
756                 be = add_block_entry(root->fs_info, bytenr, num_bytes, ref_root);
757                 if (IS_ERR(be)) {
758                         kfree(ra);
759                         ret = PTR_ERR(be);
760                         goto out;
761                 }
762                 be->num_refs++;
763                 if (metadata)
764                         be->metadata = 1;
765
766                 if (be->num_refs != 1) {
767                         btrfs_err(fs_info,
768                         "re-allocated a block that still has references to it!");
769                         dump_block_entry(fs_info, be);
770                         dump_ref_action(fs_info, ra);
771                         goto out_unlock;
772                 }
773
774                 while (!list_empty(&be->actions)) {
775                         struct ref_action *tmp;
776
777                         tmp = list_first_entry(&be->actions, struct ref_action,
778                                                list);
779                         list_del(&tmp->list);
780                         kfree(tmp);
781                 }
782         } else {
783                 struct root_entry *tmp;
784
785                 if (!parent) {
786                         re = kmalloc(sizeof(struct root_entry), GFP_NOFS);
787                         if (!re) {
788                                 kfree(ref);
789                                 kfree(ra);
790                                 ret = -ENOMEM;
791                                 goto out;
792                         }
793                         /*
794                          * This is the root that is modifying us, so it's the
795                          * one we want to lookup below when we modify the
796                          * re->num_refs.
797                          */
798                         ref_root = root->objectid;
799                         re->root_objectid = root->objectid;
800                         re->num_refs = 0;
801                 }
802
803                 spin_lock(&root->fs_info->ref_verify_lock);
804                 be = lookup_block_entry(&root->fs_info->block_tree, bytenr);
805                 if (!be) {
806                         btrfs_err(fs_info,
807 "trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!",
808                                   action, (unsigned long long)bytenr,
809                                   (unsigned long long)num_bytes);
810                         dump_ref_action(fs_info, ra);
811                         kfree(ref);
812                         kfree(ra);
813                         goto out_unlock;
814                 }
815
816                 if (!parent) {
817                         tmp = insert_root_entry(&be->roots, re);
818                         if (tmp) {
819                                 kfree(re);
820                                 re = tmp;
821                         }
822                 }
823         }
824
825         exist = insert_ref_entry(&be->refs, ref);
826         if (exist) {
827                 if (action == BTRFS_DROP_DELAYED_REF) {
828                         if (exist->num_refs == 0) {
829                                 btrfs_err(fs_info,
830 "dropping a ref for a existing root that doesn't have a ref on the block");
831                                 dump_block_entry(fs_info, be);
832                                 dump_ref_action(fs_info, ra);
833                                 kfree(ra);
834                                 goto out_unlock;
835                         }
836                         exist->num_refs--;
837                         if (exist->num_refs == 0) {
838                                 rb_erase(&exist->node, &be->refs);
839                                 kfree(exist);
840                         }
841                 } else if (!be->metadata) {
842                         exist->num_refs++;
843                 } else {
844                         btrfs_err(fs_info,
845 "attempting to add another ref for an existing ref on a tree block");
846                         dump_block_entry(fs_info, be);
847                         dump_ref_action(fs_info, ra);
848                         kfree(ra);
849                         goto out_unlock;
850                 }
851                 kfree(ref);
852         } else {
853                 if (action == BTRFS_DROP_DELAYED_REF) {
854                         btrfs_err(fs_info,
855 "dropping a ref for a root that doesn't have a ref on the block");
856                         dump_block_entry(fs_info, be);
857                         dump_ref_action(fs_info, ra);
858                         kfree(ra);
859                         goto out_unlock;
860                 }
861         }
862
863         if (!parent && !re) {
864                 re = lookup_root_entry(&be->roots, ref_root);
865                 if (!re) {
866                         /*
867                          * This shouldn't happen because we will add our re
868                          * above when we lookup the be with !parent, but just in
869                          * case catch this case so we don't panic because I
870                          * didn't thik of some other corner case.
871                          */
872                         btrfs_err(fs_info, "failed to find root %llu for %llu",
873                                   root->objectid, be->bytenr);
874                         dump_block_entry(fs_info, be);
875                         dump_ref_action(fs_info, ra);
876                         kfree(ra);
877                         goto out_unlock;
878                 }
879         }
880         if (action == BTRFS_DROP_DELAYED_REF) {
881                 if (re)
882                         re->num_refs--;
883                 be->num_refs--;
884         } else if (action == BTRFS_ADD_DELAYED_REF) {
885                 be->num_refs++;
886                 if (re)
887                         re->num_refs++;
888         }
889         list_add_tail(&ra->list, &be->actions);
890         ret = 0;
891 out_unlock:
892         spin_unlock(&root->fs_info->ref_verify_lock);
893 out:
894         if (ret)
895                 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
896         return ret;
897 }
898
899 /* Free up the ref cache */
900 void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info)
901 {
902         struct block_entry *be;
903         struct rb_node *n;
904
905         if (!btrfs_test_opt(fs_info, REF_VERIFY))
906                 return;
907
908         spin_lock(&fs_info->ref_verify_lock);
909         while ((n = rb_first(&fs_info->block_tree))) {
910                 be = rb_entry(n, struct block_entry, node);
911                 rb_erase(&be->node, &fs_info->block_tree);
912                 free_block_entry(be);
913                 cond_resched_lock(&fs_info->ref_verify_lock);
914         }
915         spin_unlock(&fs_info->ref_verify_lock);
916 }
917
918 void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
919                                u64 len)
920 {
921         struct block_entry *be = NULL, *entry;
922         struct rb_node *n;
923
924         if (!btrfs_test_opt(fs_info, REF_VERIFY))
925                 return;
926
927         spin_lock(&fs_info->ref_verify_lock);
928         n = fs_info->block_tree.rb_node;
929         while (n) {
930                 entry = rb_entry(n, struct block_entry, node);
931                 if (entry->bytenr < start) {
932                         n = n->rb_right;
933                 } else if (entry->bytenr > start) {
934                         n = n->rb_left;
935                 } else {
936                         be = entry;
937                         break;
938                 }
939                 /* We want to get as close to start as possible */
940                 if (be == NULL ||
941                     (entry->bytenr < start && be->bytenr > start) ||
942                     (entry->bytenr < start && entry->bytenr > be->bytenr))
943                         be = entry;
944         }
945
946         /*
947          * Could have an empty block group, maybe have something to check for
948          * this case to verify we were actually empty?
949          */
950         if (!be) {
951                 spin_unlock(&fs_info->ref_verify_lock);
952                 return;
953         }
954
955         n = &be->node;
956         while (n) {
957                 be = rb_entry(n, struct block_entry, node);
958                 n = rb_next(n);
959                 if (be->bytenr < start && be->bytenr + be->len > start) {
960                         btrfs_err(fs_info,
961                                 "block entry overlaps a block group [%llu,%llu]!",
962                                 start, len);
963                         dump_block_entry(fs_info, be);
964                         continue;
965                 }
966                 if (be->bytenr < start)
967                         continue;
968                 if (be->bytenr >= start + len)
969                         break;
970                 if (be->bytenr + be->len > start + len) {
971                         btrfs_err(fs_info,
972                                 "block entry overlaps a block group [%llu,%llu]!",
973                                 start, len);
974                         dump_block_entry(fs_info, be);
975                 }
976                 rb_erase(&be->node, &fs_info->block_tree);
977                 free_block_entry(be);
978         }
979         spin_unlock(&fs_info->ref_verify_lock);
980 }
981
982 /* Walk down all roots and build the ref tree, meant to be called at mount */
983 int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
984 {
985         struct btrfs_path *path;
986         struct extent_buffer *eb;
987         u64 bytenr = 0, num_bytes = 0;
988         int ret, level;
989
990         if (!btrfs_test_opt(fs_info, REF_VERIFY))
991                 return 0;
992
993         path = btrfs_alloc_path();
994         if (!path)
995                 return -ENOMEM;
996
997         eb = btrfs_read_lock_root_node(fs_info->extent_root);
998         btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
999         level = btrfs_header_level(eb);
1000         path->nodes[level] = eb;
1001         path->slots[level] = 0;
1002         path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
1003
1004         while (1) {
1005                 /*
1006                  * We have to keep track of the bytenr/num_bytes we last hit
1007                  * because we could have run out of space for an inline ref, and
1008                  * would have had to added a ref key item which may appear on a
1009                  * different leaf from the original extent item.
1010                  */
1011                 ret = walk_down_tree(fs_info->extent_root, path, level,
1012                                      &bytenr, &num_bytes);
1013                 if (ret)
1014                         break;
1015                 ret = walk_up_tree(path, &level);
1016                 if (ret < 0)
1017                         break;
1018                 if (ret > 0) {
1019                         ret = 0;
1020                         break;
1021                 }
1022         }
1023         if (ret) {
1024                 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
1025                 btrfs_free_ref_cache(fs_info);
1026         }
1027         btrfs_free_path(path);
1028         return ret;
1029 }