btrfs: trim: Check the range passed into to prevent overflow
[sfrench/cifs-2.6.git] / fs / btrfs / extent-tree.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2007 Oracle.  All rights reserved.
4  */
5
6 #include <linux/sched.h>
7 #include <linux/sched/mm.h>
8 #include <linux/sched/signal.h>
9 #include <linux/pagemap.h>
10 #include <linux/writeback.h>
11 #include <linux/blkdev.h>
12 #include <linux/sort.h>
13 #include <linux/rcupdate.h>
14 #include <linux/kthread.h>
15 #include <linux/slab.h>
16 #include <linux/ratelimit.h>
17 #include <linux/percpu_counter.h>
18 #include <linux/lockdep.h>
19 #include <linux/crc32c.h>
20 #include "tree-log.h"
21 #include "disk-io.h"
22 #include "print-tree.h"
23 #include "volumes.h"
24 #include "raid56.h"
25 #include "locking.h"
26 #include "free-space-cache.h"
27 #include "free-space-tree.h"
28 #include "math.h"
29 #include "sysfs.h"
30 #include "qgroup.h"
31 #include "ref-verify.h"
32 #include "space-info.h"
33 #include "block-rsv.h"
34 #include "delalloc-space.h"
35
36 #undef SCRAMBLE_DELAYED_REFS
37
38
39 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
40                                struct btrfs_delayed_ref_node *node, u64 parent,
41                                u64 root_objectid, u64 owner_objectid,
42                                u64 owner_offset, int refs_to_drop,
43                                struct btrfs_delayed_extent_op *extra_op);
44 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
45                                     struct extent_buffer *leaf,
46                                     struct btrfs_extent_item *ei);
47 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
48                                       u64 parent, u64 root_objectid,
49                                       u64 flags, u64 owner, u64 offset,
50                                       struct btrfs_key *ins, int ref_mod);
51 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
52                                      struct btrfs_delayed_ref_node *node,
53                                      struct btrfs_delayed_extent_op *extent_op);
54 static int find_next_key(struct btrfs_path *path, int level,
55                          struct btrfs_key *key);
56
57 static noinline int
58 block_group_cache_done(struct btrfs_block_group_cache *cache)
59 {
60         smp_mb();
61         return cache->cached == BTRFS_CACHE_FINISHED ||
62                 cache->cached == BTRFS_CACHE_ERROR;
63 }
64
65 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
66 {
67         return (cache->flags & bits) == bits;
68 }
69
70 void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
71 {
72         atomic_inc(&cache->count);
73 }
74
75 void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
76 {
77         if (atomic_dec_and_test(&cache->count)) {
78                 WARN_ON(cache->pinned > 0);
79                 WARN_ON(cache->reserved > 0);
80
81                 /*
82                  * If not empty, someone is still holding mutex of
83                  * full_stripe_lock, which can only be released by caller.
84                  * And it will definitely cause use-after-free when caller
85                  * tries to release full stripe lock.
86                  *
87                  * No better way to resolve, but only to warn.
88                  */
89                 WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
90                 kfree(cache->free_space_ctl);
91                 kfree(cache);
92         }
93 }
94
95 /*
96  * this adds the block group to the fs_info rb tree for the block group
97  * cache
98  */
99 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
100                                 struct btrfs_block_group_cache *block_group)
101 {
102         struct rb_node **p;
103         struct rb_node *parent = NULL;
104         struct btrfs_block_group_cache *cache;
105
106         spin_lock(&info->block_group_cache_lock);
107         p = &info->block_group_cache_tree.rb_node;
108
109         while (*p) {
110                 parent = *p;
111                 cache = rb_entry(parent, struct btrfs_block_group_cache,
112                                  cache_node);
113                 if (block_group->key.objectid < cache->key.objectid) {
114                         p = &(*p)->rb_left;
115                 } else if (block_group->key.objectid > cache->key.objectid) {
116                         p = &(*p)->rb_right;
117                 } else {
118                         spin_unlock(&info->block_group_cache_lock);
119                         return -EEXIST;
120                 }
121         }
122
123         rb_link_node(&block_group->cache_node, parent, p);
124         rb_insert_color(&block_group->cache_node,
125                         &info->block_group_cache_tree);
126
127         if (info->first_logical_byte > block_group->key.objectid)
128                 info->first_logical_byte = block_group->key.objectid;
129
130         spin_unlock(&info->block_group_cache_lock);
131
132         return 0;
133 }
134
135 /*
136  * This will return the block group at or after bytenr if contains is 0, else
137  * it will return the block group that contains the bytenr
138  */
139 static struct btrfs_block_group_cache *
140 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
141                               int contains)
142 {
143         struct btrfs_block_group_cache *cache, *ret = NULL;
144         struct rb_node *n;
145         u64 end, start;
146
147         spin_lock(&info->block_group_cache_lock);
148         n = info->block_group_cache_tree.rb_node;
149
150         while (n) {
151                 cache = rb_entry(n, struct btrfs_block_group_cache,
152                                  cache_node);
153                 end = cache->key.objectid + cache->key.offset - 1;
154                 start = cache->key.objectid;
155
156                 if (bytenr < start) {
157                         if (!contains && (!ret || start < ret->key.objectid))
158                                 ret = cache;
159                         n = n->rb_left;
160                 } else if (bytenr > start) {
161                         if (contains && bytenr <= end) {
162                                 ret = cache;
163                                 break;
164                         }
165                         n = n->rb_right;
166                 } else {
167                         ret = cache;
168                         break;
169                 }
170         }
171         if (ret) {
172                 btrfs_get_block_group(ret);
173                 if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
174                         info->first_logical_byte = ret->key.objectid;
175         }
176         spin_unlock(&info->block_group_cache_lock);
177
178         return ret;
179 }
180
181 static int add_excluded_extent(struct btrfs_fs_info *fs_info,
182                                u64 start, u64 num_bytes)
183 {
184         u64 end = start + num_bytes - 1;
185         set_extent_bits(&fs_info->freed_extents[0],
186                         start, end, EXTENT_UPTODATE);
187         set_extent_bits(&fs_info->freed_extents[1],
188                         start, end, EXTENT_UPTODATE);
189         return 0;
190 }
191
192 static void free_excluded_extents(struct btrfs_block_group_cache *cache)
193 {
194         struct btrfs_fs_info *fs_info = cache->fs_info;
195         u64 start, end;
196
197         start = cache->key.objectid;
198         end = start + cache->key.offset - 1;
199
200         clear_extent_bits(&fs_info->freed_extents[0],
201                           start, end, EXTENT_UPTODATE);
202         clear_extent_bits(&fs_info->freed_extents[1],
203                           start, end, EXTENT_UPTODATE);
204 }
205
206 static int exclude_super_stripes(struct btrfs_block_group_cache *cache)
207 {
208         struct btrfs_fs_info *fs_info = cache->fs_info;
209         u64 bytenr;
210         u64 *logical;
211         int stripe_len;
212         int i, nr, ret;
213
214         if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
215                 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
216                 cache->bytes_super += stripe_len;
217                 ret = add_excluded_extent(fs_info, cache->key.objectid,
218                                           stripe_len);
219                 if (ret)
220                         return ret;
221         }
222
223         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
224                 bytenr = btrfs_sb_offset(i);
225                 ret = btrfs_rmap_block(fs_info, cache->key.objectid,
226                                        bytenr, &logical, &nr, &stripe_len);
227                 if (ret)
228                         return ret;
229
230                 while (nr--) {
231                         u64 start, len;
232
233                         if (logical[nr] > cache->key.objectid +
234                             cache->key.offset)
235                                 continue;
236
237                         if (logical[nr] + stripe_len <= cache->key.objectid)
238                                 continue;
239
240                         start = logical[nr];
241                         if (start < cache->key.objectid) {
242                                 start = cache->key.objectid;
243                                 len = (logical[nr] + stripe_len) - start;
244                         } else {
245                                 len = min_t(u64, stripe_len,
246                                             cache->key.objectid +
247                                             cache->key.offset - start);
248                         }
249
250                         cache->bytes_super += len;
251                         ret = add_excluded_extent(fs_info, start, len);
252                         if (ret) {
253                                 kfree(logical);
254                                 return ret;
255                         }
256                 }
257
258                 kfree(logical);
259         }
260         return 0;
261 }
262
263 static struct btrfs_caching_control *
264 get_caching_control(struct btrfs_block_group_cache *cache)
265 {
266         struct btrfs_caching_control *ctl;
267
268         spin_lock(&cache->lock);
269         if (!cache->caching_ctl) {
270                 spin_unlock(&cache->lock);
271                 return NULL;
272         }
273
274         ctl = cache->caching_ctl;
275         refcount_inc(&ctl->count);
276         spin_unlock(&cache->lock);
277         return ctl;
278 }
279
280 static void put_caching_control(struct btrfs_caching_control *ctl)
281 {
282         if (refcount_dec_and_test(&ctl->count))
283                 kfree(ctl);
284 }
285
286 #ifdef CONFIG_BTRFS_DEBUG
287 static void fragment_free_space(struct btrfs_block_group_cache *block_group)
288 {
289         struct btrfs_fs_info *fs_info = block_group->fs_info;
290         u64 start = block_group->key.objectid;
291         u64 len = block_group->key.offset;
292         u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
293                 fs_info->nodesize : fs_info->sectorsize;
294         u64 step = chunk << 1;
295
296         while (len > chunk) {
297                 btrfs_remove_free_space(block_group, start, chunk);
298                 start += step;
299                 if (len < step)
300                         len = 0;
301                 else
302                         len -= step;
303         }
304 }
305 #endif
306
307 /*
308  * this is only called by cache_block_group, since we could have freed extents
309  * we need to check the pinned_extents for any extents that can't be used yet
310  * since their free space will be released as soon as the transaction commits.
311  */
312 u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
313                        u64 start, u64 end)
314 {
315         struct btrfs_fs_info *info = block_group->fs_info;
316         u64 extent_start, extent_end, size, total_added = 0;
317         int ret;
318
319         while (start < end) {
320                 ret = find_first_extent_bit(info->pinned_extents, start,
321                                             &extent_start, &extent_end,
322                                             EXTENT_DIRTY | EXTENT_UPTODATE,
323                                             NULL);
324                 if (ret)
325                         break;
326
327                 if (extent_start <= start) {
328                         start = extent_end + 1;
329                 } else if (extent_start > start && extent_start < end) {
330                         size = extent_start - start;
331                         total_added += size;
332                         ret = btrfs_add_free_space(block_group, start,
333                                                    size);
334                         BUG_ON(ret); /* -ENOMEM or logic error */
335                         start = extent_end + 1;
336                 } else {
337                         break;
338                 }
339         }
340
341         if (start < end) {
342                 size = end - start;
343                 total_added += size;
344                 ret = btrfs_add_free_space(block_group, start, size);
345                 BUG_ON(ret); /* -ENOMEM or logic error */
346         }
347
348         return total_added;
349 }
350
351 static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
352 {
353         struct btrfs_block_group_cache *block_group = caching_ctl->block_group;
354         struct btrfs_fs_info *fs_info = block_group->fs_info;
355         struct btrfs_root *extent_root = fs_info->extent_root;
356         struct btrfs_path *path;
357         struct extent_buffer *leaf;
358         struct btrfs_key key;
359         u64 total_found = 0;
360         u64 last = 0;
361         u32 nritems;
362         int ret;
363         bool wakeup = true;
364
365         path = btrfs_alloc_path();
366         if (!path)
367                 return -ENOMEM;
368
369         last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
370
371 #ifdef CONFIG_BTRFS_DEBUG
372         /*
373          * If we're fragmenting we don't want to make anybody think we can
374          * allocate from this block group until we've had a chance to fragment
375          * the free space.
376          */
377         if (btrfs_should_fragment_free_space(block_group))
378                 wakeup = false;
379 #endif
380         /*
381          * We don't want to deadlock with somebody trying to allocate a new
382          * extent for the extent root while also trying to search the extent
383          * root to add free space.  So we skip locking and search the commit
384          * root, since its read-only
385          */
386         path->skip_locking = 1;
387         path->search_commit_root = 1;
388         path->reada = READA_FORWARD;
389
390         key.objectid = last;
391         key.offset = 0;
392         key.type = BTRFS_EXTENT_ITEM_KEY;
393
394 next:
395         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
396         if (ret < 0)
397                 goto out;
398
399         leaf = path->nodes[0];
400         nritems = btrfs_header_nritems(leaf);
401
402         while (1) {
403                 if (btrfs_fs_closing(fs_info) > 1) {
404                         last = (u64)-1;
405                         break;
406                 }
407
408                 if (path->slots[0] < nritems) {
409                         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
410                 } else {
411                         ret = find_next_key(path, 0, &key);
412                         if (ret)
413                                 break;
414
415                         if (need_resched() ||
416                             rwsem_is_contended(&fs_info->commit_root_sem)) {
417                                 if (wakeup)
418                                         caching_ctl->progress = last;
419                                 btrfs_release_path(path);
420                                 up_read(&fs_info->commit_root_sem);
421                                 mutex_unlock(&caching_ctl->mutex);
422                                 cond_resched();
423                                 mutex_lock(&caching_ctl->mutex);
424                                 down_read(&fs_info->commit_root_sem);
425                                 goto next;
426                         }
427
428                         ret = btrfs_next_leaf(extent_root, path);
429                         if (ret < 0)
430                                 goto out;
431                         if (ret)
432                                 break;
433                         leaf = path->nodes[0];
434                         nritems = btrfs_header_nritems(leaf);
435                         continue;
436                 }
437
438                 if (key.objectid < last) {
439                         key.objectid = last;
440                         key.offset = 0;
441                         key.type = BTRFS_EXTENT_ITEM_KEY;
442
443                         if (wakeup)
444                                 caching_ctl->progress = last;
445                         btrfs_release_path(path);
446                         goto next;
447                 }
448
449                 if (key.objectid < block_group->key.objectid) {
450                         path->slots[0]++;
451                         continue;
452                 }
453
454                 if (key.objectid >= block_group->key.objectid +
455                     block_group->key.offset)
456                         break;
457
458                 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
459                     key.type == BTRFS_METADATA_ITEM_KEY) {
460                         total_found += add_new_free_space(block_group, last,
461                                                           key.objectid);
462                         if (key.type == BTRFS_METADATA_ITEM_KEY)
463                                 last = key.objectid +
464                                         fs_info->nodesize;
465                         else
466                                 last = key.objectid + key.offset;
467
468                         if (total_found > CACHING_CTL_WAKE_UP) {
469                                 total_found = 0;
470                                 if (wakeup)
471                                         wake_up(&caching_ctl->wait);
472                         }
473                 }
474                 path->slots[0]++;
475         }
476         ret = 0;
477
478         total_found += add_new_free_space(block_group, last,
479                                           block_group->key.objectid +
480                                           block_group->key.offset);
481         caching_ctl->progress = (u64)-1;
482
483 out:
484         btrfs_free_path(path);
485         return ret;
486 }
487
488 static noinline void caching_thread(struct btrfs_work *work)
489 {
490         struct btrfs_block_group_cache *block_group;
491         struct btrfs_fs_info *fs_info;
492         struct btrfs_caching_control *caching_ctl;
493         int ret;
494
495         caching_ctl = container_of(work, struct btrfs_caching_control, work);
496         block_group = caching_ctl->block_group;
497         fs_info = block_group->fs_info;
498
499         mutex_lock(&caching_ctl->mutex);
500         down_read(&fs_info->commit_root_sem);
501
502         if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
503                 ret = load_free_space_tree(caching_ctl);
504         else
505                 ret = load_extent_tree_free(caching_ctl);
506
507         spin_lock(&block_group->lock);
508         block_group->caching_ctl = NULL;
509         block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
510         spin_unlock(&block_group->lock);
511
512 #ifdef CONFIG_BTRFS_DEBUG
513         if (btrfs_should_fragment_free_space(block_group)) {
514                 u64 bytes_used;
515
516                 spin_lock(&block_group->space_info->lock);
517                 spin_lock(&block_group->lock);
518                 bytes_used = block_group->key.offset -
519                         btrfs_block_group_used(&block_group->item);
520                 block_group->space_info->bytes_used += bytes_used >> 1;
521                 spin_unlock(&block_group->lock);
522                 spin_unlock(&block_group->space_info->lock);
523                 fragment_free_space(block_group);
524         }
525 #endif
526
527         caching_ctl->progress = (u64)-1;
528
529         up_read(&fs_info->commit_root_sem);
530         free_excluded_extents(block_group);
531         mutex_unlock(&caching_ctl->mutex);
532
533         wake_up(&caching_ctl->wait);
534
535         put_caching_control(caching_ctl);
536         btrfs_put_block_group(block_group);
537 }
538
539 static int cache_block_group(struct btrfs_block_group_cache *cache,
540                              int load_cache_only)
541 {
542         DEFINE_WAIT(wait);
543         struct btrfs_fs_info *fs_info = cache->fs_info;
544         struct btrfs_caching_control *caching_ctl;
545         int ret = 0;
546
547         caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
548         if (!caching_ctl)
549                 return -ENOMEM;
550
551         INIT_LIST_HEAD(&caching_ctl->list);
552         mutex_init(&caching_ctl->mutex);
553         init_waitqueue_head(&caching_ctl->wait);
554         caching_ctl->block_group = cache;
555         caching_ctl->progress = cache->key.objectid;
556         refcount_set(&caching_ctl->count, 1);
557         btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
558                         caching_thread, NULL, NULL);
559
560         spin_lock(&cache->lock);
561         /*
562          * This should be a rare occasion, but this could happen I think in the
563          * case where one thread starts to load the space cache info, and then
564          * some other thread starts a transaction commit which tries to do an
565          * allocation while the other thread is still loading the space cache
566          * info.  The previous loop should have kept us from choosing this block
567          * group, but if we've moved to the state where we will wait on caching
568          * block groups we need to first check if we're doing a fast load here,
569          * so we can wait for it to finish, otherwise we could end up allocating
570          * from a block group who's cache gets evicted for one reason or
571          * another.
572          */
573         while (cache->cached == BTRFS_CACHE_FAST) {
574                 struct btrfs_caching_control *ctl;
575
576                 ctl = cache->caching_ctl;
577                 refcount_inc(&ctl->count);
578                 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
579                 spin_unlock(&cache->lock);
580
581                 schedule();
582
583                 finish_wait(&ctl->wait, &wait);
584                 put_caching_control(ctl);
585                 spin_lock(&cache->lock);
586         }
587
588         if (cache->cached != BTRFS_CACHE_NO) {
589                 spin_unlock(&cache->lock);
590                 kfree(caching_ctl);
591                 return 0;
592         }
593         WARN_ON(cache->caching_ctl);
594         cache->caching_ctl = caching_ctl;
595         cache->cached = BTRFS_CACHE_FAST;
596         spin_unlock(&cache->lock);
597
598         if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
599                 mutex_lock(&caching_ctl->mutex);
600                 ret = load_free_space_cache(cache);
601
602                 spin_lock(&cache->lock);
603                 if (ret == 1) {
604                         cache->caching_ctl = NULL;
605                         cache->cached = BTRFS_CACHE_FINISHED;
606                         cache->last_byte_to_unpin = (u64)-1;
607                         caching_ctl->progress = (u64)-1;
608                 } else {
609                         if (load_cache_only) {
610                                 cache->caching_ctl = NULL;
611                                 cache->cached = BTRFS_CACHE_NO;
612                         } else {
613                                 cache->cached = BTRFS_CACHE_STARTED;
614                                 cache->has_caching_ctl = 1;
615                         }
616                 }
617                 spin_unlock(&cache->lock);
618 #ifdef CONFIG_BTRFS_DEBUG
619                 if (ret == 1 &&
620                     btrfs_should_fragment_free_space(cache)) {
621                         u64 bytes_used;
622
623                         spin_lock(&cache->space_info->lock);
624                         spin_lock(&cache->lock);
625                         bytes_used = cache->key.offset -
626                                 btrfs_block_group_used(&cache->item);
627                         cache->space_info->bytes_used += bytes_used >> 1;
628                         spin_unlock(&cache->lock);
629                         spin_unlock(&cache->space_info->lock);
630                         fragment_free_space(cache);
631                 }
632 #endif
633                 mutex_unlock(&caching_ctl->mutex);
634
635                 wake_up(&caching_ctl->wait);
636                 if (ret == 1) {
637                         put_caching_control(caching_ctl);
638                         free_excluded_extents(cache);
639                         return 0;
640                 }
641         } else {
642                 /*
643                  * We're either using the free space tree or no caching at all.
644                  * Set cached to the appropriate value and wakeup any waiters.
645                  */
646                 spin_lock(&cache->lock);
647                 if (load_cache_only) {
648                         cache->caching_ctl = NULL;
649                         cache->cached = BTRFS_CACHE_NO;
650                 } else {
651                         cache->cached = BTRFS_CACHE_STARTED;
652                         cache->has_caching_ctl = 1;
653                 }
654                 spin_unlock(&cache->lock);
655                 wake_up(&caching_ctl->wait);
656         }
657
658         if (load_cache_only) {
659                 put_caching_control(caching_ctl);
660                 return 0;
661         }
662
663         down_write(&fs_info->commit_root_sem);
664         refcount_inc(&caching_ctl->count);
665         list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
666         up_write(&fs_info->commit_root_sem);
667
668         btrfs_get_block_group(cache);
669
670         btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
671
672         return ret;
673 }
674
675 /*
676  * return the block group that starts at or after bytenr
677  */
678 static struct btrfs_block_group_cache *
679 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
680 {
681         return block_group_cache_tree_search(info, bytenr, 0);
682 }
683
684 /*
685  * return the block group that contains the given bytenr
686  */
687 struct btrfs_block_group_cache *btrfs_lookup_block_group(
688                                                  struct btrfs_fs_info *info,
689                                                  u64 bytenr)
690 {
691         return block_group_cache_tree_search(info, bytenr, 1);
692 }
693
694 static u64 generic_ref_to_space_flags(struct btrfs_ref *ref)
695 {
696         if (ref->type == BTRFS_REF_METADATA) {
697                 if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
698                         return BTRFS_BLOCK_GROUP_SYSTEM;
699                 else
700                         return BTRFS_BLOCK_GROUP_METADATA;
701         }
702         return BTRFS_BLOCK_GROUP_DATA;
703 }
704
705 static void add_pinned_bytes(struct btrfs_fs_info *fs_info,
706                              struct btrfs_ref *ref)
707 {
708         struct btrfs_space_info *space_info;
709         u64 flags = generic_ref_to_space_flags(ref);
710
711         space_info = btrfs_find_space_info(fs_info, flags);
712         ASSERT(space_info);
713         percpu_counter_add_batch(&space_info->total_bytes_pinned, ref->len,
714                     BTRFS_TOTAL_BYTES_PINNED_BATCH);
715 }
716
717 static void sub_pinned_bytes(struct btrfs_fs_info *fs_info,
718                              struct btrfs_ref *ref)
719 {
720         struct btrfs_space_info *space_info;
721         u64 flags = generic_ref_to_space_flags(ref);
722
723         space_info = btrfs_find_space_info(fs_info, flags);
724         ASSERT(space_info);
725         percpu_counter_add_batch(&space_info->total_bytes_pinned, -ref->len,
726                     BTRFS_TOTAL_BYTES_PINNED_BATCH);
727 }
728
729 /* simple helper to search for an existing data extent at a given offset */
730 int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
731 {
732         int ret;
733         struct btrfs_key key;
734         struct btrfs_path *path;
735
736         path = btrfs_alloc_path();
737         if (!path)
738                 return -ENOMEM;
739
740         key.objectid = start;
741         key.offset = len;
742         key.type = BTRFS_EXTENT_ITEM_KEY;
743         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
744         btrfs_free_path(path);
745         return ret;
746 }
747
748 /*
749  * helper function to lookup reference count and flags of a tree block.
750  *
751  * the head node for delayed ref is used to store the sum of all the
752  * reference count modifications queued up in the rbtree. the head
753  * node may also store the extent flags to set. This way you can check
754  * to see what the reference count and extent flags would be if all of
755  * the delayed refs are not processed.
756  */
757 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
758                              struct btrfs_fs_info *fs_info, u64 bytenr,
759                              u64 offset, int metadata, u64 *refs, u64 *flags)
760 {
761         struct btrfs_delayed_ref_head *head;
762         struct btrfs_delayed_ref_root *delayed_refs;
763         struct btrfs_path *path;
764         struct btrfs_extent_item *ei;
765         struct extent_buffer *leaf;
766         struct btrfs_key key;
767         u32 item_size;
768         u64 num_refs;
769         u64 extent_flags;
770         int ret;
771
772         /*
773          * If we don't have skinny metadata, don't bother doing anything
774          * different
775          */
776         if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
777                 offset = fs_info->nodesize;
778                 metadata = 0;
779         }
780
781         path = btrfs_alloc_path();
782         if (!path)
783                 return -ENOMEM;
784
785         if (!trans) {
786                 path->skip_locking = 1;
787                 path->search_commit_root = 1;
788         }
789
790 search_again:
791         key.objectid = bytenr;
792         key.offset = offset;
793         if (metadata)
794                 key.type = BTRFS_METADATA_ITEM_KEY;
795         else
796                 key.type = BTRFS_EXTENT_ITEM_KEY;
797
798         ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
799         if (ret < 0)
800                 goto out_free;
801
802         if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
803                 if (path->slots[0]) {
804                         path->slots[0]--;
805                         btrfs_item_key_to_cpu(path->nodes[0], &key,
806                                               path->slots[0]);
807                         if (key.objectid == bytenr &&
808                             key.type == BTRFS_EXTENT_ITEM_KEY &&
809                             key.offset == fs_info->nodesize)
810                                 ret = 0;
811                 }
812         }
813
814         if (ret == 0) {
815                 leaf = path->nodes[0];
816                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
817                 if (item_size >= sizeof(*ei)) {
818                         ei = btrfs_item_ptr(leaf, path->slots[0],
819                                             struct btrfs_extent_item);
820                         num_refs = btrfs_extent_refs(leaf, ei);
821                         extent_flags = btrfs_extent_flags(leaf, ei);
822                 } else {
823                         ret = -EINVAL;
824                         btrfs_print_v0_err(fs_info);
825                         if (trans)
826                                 btrfs_abort_transaction(trans, ret);
827                         else
828                                 btrfs_handle_fs_error(fs_info, ret, NULL);
829
830                         goto out_free;
831                 }
832
833                 BUG_ON(num_refs == 0);
834         } else {
835                 num_refs = 0;
836                 extent_flags = 0;
837                 ret = 0;
838         }
839
840         if (!trans)
841                 goto out;
842
843         delayed_refs = &trans->transaction->delayed_refs;
844         spin_lock(&delayed_refs->lock);
845         head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
846         if (head) {
847                 if (!mutex_trylock(&head->mutex)) {
848                         refcount_inc(&head->refs);
849                         spin_unlock(&delayed_refs->lock);
850
851                         btrfs_release_path(path);
852
853                         /*
854                          * Mutex was contended, block until it's released and try
855                          * again
856                          */
857                         mutex_lock(&head->mutex);
858                         mutex_unlock(&head->mutex);
859                         btrfs_put_delayed_ref_head(head);
860                         goto search_again;
861                 }
862                 spin_lock(&head->lock);
863                 if (head->extent_op && head->extent_op->update_flags)
864                         extent_flags |= head->extent_op->flags_to_set;
865                 else
866                         BUG_ON(num_refs == 0);
867
868                 num_refs += head->ref_mod;
869                 spin_unlock(&head->lock);
870                 mutex_unlock(&head->mutex);
871         }
872         spin_unlock(&delayed_refs->lock);
873 out:
874         WARN_ON(num_refs == 0);
875         if (refs)
876                 *refs = num_refs;
877         if (flags)
878                 *flags = extent_flags;
879 out_free:
880         btrfs_free_path(path);
881         return ret;
882 }
883
884 /*
885  * Back reference rules.  Back refs have three main goals:
886  *
887  * 1) differentiate between all holders of references to an extent so that
888  *    when a reference is dropped we can make sure it was a valid reference
889  *    before freeing the extent.
890  *
891  * 2) Provide enough information to quickly find the holders of an extent
892  *    if we notice a given block is corrupted or bad.
893  *
894  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
895  *    maintenance.  This is actually the same as #2, but with a slightly
896  *    different use case.
897  *
898  * There are two kinds of back refs. The implicit back refs is optimized
899  * for pointers in non-shared tree blocks. For a given pointer in a block,
900  * back refs of this kind provide information about the block's owner tree
901  * and the pointer's key. These information allow us to find the block by
902  * b-tree searching. The full back refs is for pointers in tree blocks not
903  * referenced by their owner trees. The location of tree block is recorded
904  * in the back refs. Actually the full back refs is generic, and can be
905  * used in all cases the implicit back refs is used. The major shortcoming
906  * of the full back refs is its overhead. Every time a tree block gets
907  * COWed, we have to update back refs entry for all pointers in it.
908  *
909  * For a newly allocated tree block, we use implicit back refs for
910  * pointers in it. This means most tree related operations only involve
911  * implicit back refs. For a tree block created in old transaction, the
912  * only way to drop a reference to it is COW it. So we can detect the
913  * event that tree block loses its owner tree's reference and do the
914  * back refs conversion.
915  *
916  * When a tree block is COWed through a tree, there are four cases:
917  *
918  * The reference count of the block is one and the tree is the block's
919  * owner tree. Nothing to do in this case.
920  *
921  * The reference count of the block is one and the tree is not the
922  * block's owner tree. In this case, full back refs is used for pointers
923  * in the block. Remove these full back refs, add implicit back refs for
924  * every pointers in the new block.
925  *
926  * The reference count of the block is greater than one and the tree is
927  * the block's owner tree. In this case, implicit back refs is used for
928  * pointers in the block. Add full back refs for every pointers in the
929  * block, increase lower level extents' reference counts. The original
930  * implicit back refs are entailed to the new block.
931  *
932  * The reference count of the block is greater than one and the tree is
933  * not the block's owner tree. Add implicit back refs for every pointer in
934  * the new block, increase lower level extents' reference count.
935  *
936  * Back Reference Key composing:
937  *
938  * The key objectid corresponds to the first byte in the extent,
939  * The key type is used to differentiate between types of back refs.
940  * There are different meanings of the key offset for different types
941  * of back refs.
942  *
943  * File extents can be referenced by:
944  *
945  * - multiple snapshots, subvolumes, or different generations in one subvol
946  * - different files inside a single subvolume
947  * - different offsets inside a file (bookend extents in file.c)
948  *
949  * The extent ref structure for the implicit back refs has fields for:
950  *
951  * - Objectid of the subvolume root
952  * - objectid of the file holding the reference
953  * - original offset in the file
954  * - how many bookend extents
955  *
956  * The key offset for the implicit back refs is hash of the first
957  * three fields.
958  *
959  * The extent ref structure for the full back refs has field for:
960  *
961  * - number of pointers in the tree leaf
962  *
963  * The key offset for the implicit back refs is the first byte of
964  * the tree leaf
965  *
966  * When a file extent is allocated, The implicit back refs is used.
967  * the fields are filled in:
968  *
969  *     (root_key.objectid, inode objectid, offset in file, 1)
970  *
971  * When a file extent is removed file truncation, we find the
972  * corresponding implicit back refs and check the following fields:
973  *
974  *     (btrfs_header_owner(leaf), inode objectid, offset in file)
975  *
976  * Btree extents can be referenced by:
977  *
978  * - Different subvolumes
979  *
980  * Both the implicit back refs and the full back refs for tree blocks
981  * only consist of key. The key offset for the implicit back refs is
982  * objectid of block's owner tree. The key offset for the full back refs
983  * is the first byte of parent block.
984  *
985  * When implicit back refs is used, information about the lowest key and
986  * level of the tree block are required. These information are stored in
987  * tree block info structure.
988  */
989
990 /*
991  * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required,
992  * is_data == BTRFS_REF_TYPE_DATA, data type is requiried,
993  * is_data == BTRFS_REF_TYPE_ANY, either type is OK.
994  */
995 int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
996                                      struct btrfs_extent_inline_ref *iref,
997                                      enum btrfs_inline_ref_type is_data)
998 {
999         int type = btrfs_extent_inline_ref_type(eb, iref);
1000         u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
1001
1002         if (type == BTRFS_TREE_BLOCK_REF_KEY ||
1003             type == BTRFS_SHARED_BLOCK_REF_KEY ||
1004             type == BTRFS_SHARED_DATA_REF_KEY ||
1005             type == BTRFS_EXTENT_DATA_REF_KEY) {
1006                 if (is_data == BTRFS_REF_TYPE_BLOCK) {
1007                         if (type == BTRFS_TREE_BLOCK_REF_KEY)
1008                                 return type;
1009                         if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1010                                 ASSERT(eb->fs_info);
1011                                 /*
1012                                  * Every shared one has parent tree
1013                                  * block, which must be aligned to
1014                                  * nodesize.
1015                                  */
1016                                 if (offset &&
1017                                     IS_ALIGNED(offset, eb->fs_info->nodesize))
1018                                         return type;
1019                         }
1020                 } else if (is_data == BTRFS_REF_TYPE_DATA) {
1021                         if (type == BTRFS_EXTENT_DATA_REF_KEY)
1022                                 return type;
1023                         if (type == BTRFS_SHARED_DATA_REF_KEY) {
1024                                 ASSERT(eb->fs_info);
1025                                 /*
1026                                  * Every shared one has parent tree
1027                                  * block, which must be aligned to
1028                                  * nodesize.
1029                                  */
1030                                 if (offset &&
1031                                     IS_ALIGNED(offset, eb->fs_info->nodesize))
1032                                         return type;
1033                         }
1034                 } else {
1035                         ASSERT(is_data == BTRFS_REF_TYPE_ANY);
1036                         return type;
1037                 }
1038         }
1039
1040         btrfs_print_leaf((struct extent_buffer *)eb);
1041         btrfs_err(eb->fs_info, "eb %llu invalid extent inline ref type %d",
1042                   eb->start, type);
1043         WARN_ON(1);
1044
1045         return BTRFS_REF_TYPE_INVALID;
1046 }
1047
1048 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1049 {
1050         u32 high_crc = ~(u32)0;
1051         u32 low_crc = ~(u32)0;
1052         __le64 lenum;
1053
1054         lenum = cpu_to_le64(root_objectid);
1055         high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
1056         lenum = cpu_to_le64(owner);
1057         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1058         lenum = cpu_to_le64(offset);
1059         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1060
1061         return ((u64)high_crc << 31) ^ (u64)low_crc;
1062 }
1063
1064 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
1065                                      struct btrfs_extent_data_ref *ref)
1066 {
1067         return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
1068                                     btrfs_extent_data_ref_objectid(leaf, ref),
1069                                     btrfs_extent_data_ref_offset(leaf, ref));
1070 }
1071
1072 static int match_extent_data_ref(struct extent_buffer *leaf,
1073                                  struct btrfs_extent_data_ref *ref,
1074                                  u64 root_objectid, u64 owner, u64 offset)
1075 {
1076         if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
1077             btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
1078             btrfs_extent_data_ref_offset(leaf, ref) != offset)
1079                 return 0;
1080         return 1;
1081 }
1082
1083 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
1084                                            struct btrfs_path *path,
1085                                            u64 bytenr, u64 parent,
1086                                            u64 root_objectid,
1087                                            u64 owner, u64 offset)
1088 {
1089         struct btrfs_root *root = trans->fs_info->extent_root;
1090         struct btrfs_key key;
1091         struct btrfs_extent_data_ref *ref;
1092         struct extent_buffer *leaf;
1093         u32 nritems;
1094         int ret;
1095         int recow;
1096         int err = -ENOENT;
1097
1098         key.objectid = bytenr;
1099         if (parent) {
1100                 key.type = BTRFS_SHARED_DATA_REF_KEY;
1101                 key.offset = parent;
1102         } else {
1103                 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1104                 key.offset = hash_extent_data_ref(root_objectid,
1105                                                   owner, offset);
1106         }
1107 again:
1108         recow = 0;
1109         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1110         if (ret < 0) {
1111                 err = ret;
1112                 goto fail;
1113         }
1114
1115         if (parent) {
1116                 if (!ret)
1117                         return 0;
1118                 goto fail;
1119         }
1120
1121         leaf = path->nodes[0];
1122         nritems = btrfs_header_nritems(leaf);
1123         while (1) {
1124                 if (path->slots[0] >= nritems) {
1125                         ret = btrfs_next_leaf(root, path);
1126                         if (ret < 0)
1127                                 err = ret;
1128                         if (ret)
1129                                 goto fail;
1130
1131                         leaf = path->nodes[0];
1132                         nritems = btrfs_header_nritems(leaf);
1133                         recow = 1;
1134                 }
1135
1136                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1137                 if (key.objectid != bytenr ||
1138                     key.type != BTRFS_EXTENT_DATA_REF_KEY)
1139                         goto fail;
1140
1141                 ref = btrfs_item_ptr(leaf, path->slots[0],
1142                                      struct btrfs_extent_data_ref);
1143
1144                 if (match_extent_data_ref(leaf, ref, root_objectid,
1145                                           owner, offset)) {
1146                         if (recow) {
1147                                 btrfs_release_path(path);
1148                                 goto again;
1149                         }
1150                         err = 0;
1151                         break;
1152                 }
1153                 path->slots[0]++;
1154         }
1155 fail:
1156         return err;
1157 }
1158
1159 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1160                                            struct btrfs_path *path,
1161                                            u64 bytenr, u64 parent,
1162                                            u64 root_objectid, u64 owner,
1163                                            u64 offset, int refs_to_add)
1164 {
1165         struct btrfs_root *root = trans->fs_info->extent_root;
1166         struct btrfs_key key;
1167         struct extent_buffer *leaf;
1168         u32 size;
1169         u32 num_refs;
1170         int ret;
1171
1172         key.objectid = bytenr;
1173         if (parent) {
1174                 key.type = BTRFS_SHARED_DATA_REF_KEY;
1175                 key.offset = parent;
1176                 size = sizeof(struct btrfs_shared_data_ref);
1177         } else {
1178                 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1179                 key.offset = hash_extent_data_ref(root_objectid,
1180                                                   owner, offset);
1181                 size = sizeof(struct btrfs_extent_data_ref);
1182         }
1183
1184         ret = btrfs_insert_empty_item(trans, root, path, &key, size);
1185         if (ret && ret != -EEXIST)
1186                 goto fail;
1187
1188         leaf = path->nodes[0];
1189         if (parent) {
1190                 struct btrfs_shared_data_ref *ref;
1191                 ref = btrfs_item_ptr(leaf, path->slots[0],
1192                                      struct btrfs_shared_data_ref);
1193                 if (ret == 0) {
1194                         btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
1195                 } else {
1196                         num_refs = btrfs_shared_data_ref_count(leaf, ref);
1197                         num_refs += refs_to_add;
1198                         btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
1199                 }
1200         } else {
1201                 struct btrfs_extent_data_ref *ref;
1202                 while (ret == -EEXIST) {
1203                         ref = btrfs_item_ptr(leaf, path->slots[0],
1204                                              struct btrfs_extent_data_ref);
1205                         if (match_extent_data_ref(leaf, ref, root_objectid,
1206                                                   owner, offset))
1207                                 break;
1208                         btrfs_release_path(path);
1209                         key.offset++;
1210                         ret = btrfs_insert_empty_item(trans, root, path, &key,
1211                                                       size);
1212                         if (ret && ret != -EEXIST)
1213                                 goto fail;
1214
1215                         leaf = path->nodes[0];
1216                 }
1217                 ref = btrfs_item_ptr(leaf, path->slots[0],
1218                                      struct btrfs_extent_data_ref);
1219                 if (ret == 0) {
1220                         btrfs_set_extent_data_ref_root(leaf, ref,
1221                                                        root_objectid);
1222                         btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
1223                         btrfs_set_extent_data_ref_offset(leaf, ref, offset);
1224                         btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
1225                 } else {
1226                         num_refs = btrfs_extent_data_ref_count(leaf, ref);
1227                         num_refs += refs_to_add;
1228                         btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
1229                 }
1230         }
1231         btrfs_mark_buffer_dirty(leaf);
1232         ret = 0;
1233 fail:
1234         btrfs_release_path(path);
1235         return ret;
1236 }
1237
1238 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1239                                            struct btrfs_path *path,
1240                                            int refs_to_drop, int *last_ref)
1241 {
1242         struct btrfs_key key;
1243         struct btrfs_extent_data_ref *ref1 = NULL;
1244         struct btrfs_shared_data_ref *ref2 = NULL;
1245         struct extent_buffer *leaf;
1246         u32 num_refs = 0;
1247         int ret = 0;
1248
1249         leaf = path->nodes[0];
1250         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1251
1252         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1253                 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1254                                       struct btrfs_extent_data_ref);
1255                 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1256         } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1257                 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1258                                       struct btrfs_shared_data_ref);
1259                 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1260         } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
1261                 btrfs_print_v0_err(trans->fs_info);
1262                 btrfs_abort_transaction(trans, -EINVAL);
1263                 return -EINVAL;
1264         } else {
1265                 BUG();
1266         }
1267
1268         BUG_ON(num_refs < refs_to_drop);
1269         num_refs -= refs_to_drop;
1270
1271         if (num_refs == 0) {
1272                 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
1273                 *last_ref = 1;
1274         } else {
1275                 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1276                         btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
1277                 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
1278                         btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
1279                 btrfs_mark_buffer_dirty(leaf);
1280         }
1281         return ret;
1282 }
1283
1284 static noinline u32 extent_data_ref_count(struct btrfs_path *path,
1285                                           struct btrfs_extent_inline_ref *iref)
1286 {
1287         struct btrfs_key key;
1288         struct extent_buffer *leaf;
1289         struct btrfs_extent_data_ref *ref1;
1290         struct btrfs_shared_data_ref *ref2;
1291         u32 num_refs = 0;
1292         int type;
1293
1294         leaf = path->nodes[0];
1295         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1296
1297         BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
1298         if (iref) {
1299                 /*
1300                  * If type is invalid, we should have bailed out earlier than
1301                  * this call.
1302                  */
1303                 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
1304                 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1305                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1306                         ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
1307                         num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1308                 } else {
1309                         ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
1310                         num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1311                 }
1312         } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1313                 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1314                                       struct btrfs_extent_data_ref);
1315                 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1316         } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1317                 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1318                                       struct btrfs_shared_data_ref);
1319                 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1320         } else {
1321                 WARN_ON(1);
1322         }
1323         return num_refs;
1324 }
1325
1326 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1327                                           struct btrfs_path *path,
1328                                           u64 bytenr, u64 parent,
1329                                           u64 root_objectid)
1330 {
1331         struct btrfs_root *root = trans->fs_info->extent_root;
1332         struct btrfs_key key;
1333         int ret;
1334
1335         key.objectid = bytenr;
1336         if (parent) {
1337                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1338                 key.offset = parent;
1339         } else {
1340                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1341                 key.offset = root_objectid;
1342         }
1343
1344         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1345         if (ret > 0)
1346                 ret = -ENOENT;
1347         return ret;
1348 }
1349
1350 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1351                                           struct btrfs_path *path,
1352                                           u64 bytenr, u64 parent,
1353                                           u64 root_objectid)
1354 {
1355         struct btrfs_key key;
1356         int ret;
1357
1358         key.objectid = bytenr;
1359         if (parent) {
1360                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1361                 key.offset = parent;
1362         } else {
1363                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1364                 key.offset = root_objectid;
1365         }
1366
1367         ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root,
1368                                       path, &key, 0);
1369         btrfs_release_path(path);
1370         return ret;
1371 }
1372
1373 static inline int extent_ref_type(u64 parent, u64 owner)
1374 {
1375         int type;
1376         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1377                 if (parent > 0)
1378                         type = BTRFS_SHARED_BLOCK_REF_KEY;
1379                 else
1380                         type = BTRFS_TREE_BLOCK_REF_KEY;
1381         } else {
1382                 if (parent > 0)
1383                         type = BTRFS_SHARED_DATA_REF_KEY;
1384                 else
1385                         type = BTRFS_EXTENT_DATA_REF_KEY;
1386         }
1387         return type;
1388 }
1389
1390 static int find_next_key(struct btrfs_path *path, int level,
1391                          struct btrfs_key *key)
1392
1393 {
1394         for (; level < BTRFS_MAX_LEVEL; level++) {
1395                 if (!path->nodes[level])
1396                         break;
1397                 if (path->slots[level] + 1 >=
1398                     btrfs_header_nritems(path->nodes[level]))
1399                         continue;
1400                 if (level == 0)
1401                         btrfs_item_key_to_cpu(path->nodes[level], key,
1402                                               path->slots[level] + 1);
1403                 else
1404                         btrfs_node_key_to_cpu(path->nodes[level], key,
1405                                               path->slots[level] + 1);
1406                 return 0;
1407         }
1408         return 1;
1409 }
1410
1411 /*
1412  * look for inline back ref. if back ref is found, *ref_ret is set
1413  * to the address of inline back ref, and 0 is returned.
1414  *
1415  * if back ref isn't found, *ref_ret is set to the address where it
1416  * should be inserted, and -ENOENT is returned.
1417  *
1418  * if insert is true and there are too many inline back refs, the path
1419  * points to the extent item, and -EAGAIN is returned.
1420  *
1421  * NOTE: inline back refs are ordered in the same way that back ref
1422  *       items in the tree are ordered.
1423  */
1424 static noinline_for_stack
1425 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1426                                  struct btrfs_path *path,
1427                                  struct btrfs_extent_inline_ref **ref_ret,
1428                                  u64 bytenr, u64 num_bytes,
1429                                  u64 parent, u64 root_objectid,
1430                                  u64 owner, u64 offset, int insert)
1431 {
1432         struct btrfs_fs_info *fs_info = trans->fs_info;
1433         struct btrfs_root *root = fs_info->extent_root;
1434         struct btrfs_key key;
1435         struct extent_buffer *leaf;
1436         struct btrfs_extent_item *ei;
1437         struct btrfs_extent_inline_ref *iref;
1438         u64 flags;
1439         u64 item_size;
1440         unsigned long ptr;
1441         unsigned long end;
1442         int extra_size;
1443         int type;
1444         int want;
1445         int ret;
1446         int err = 0;
1447         bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
1448         int needed;
1449
1450         key.objectid = bytenr;
1451         key.type = BTRFS_EXTENT_ITEM_KEY;
1452         key.offset = num_bytes;
1453
1454         want = extent_ref_type(parent, owner);
1455         if (insert) {
1456                 extra_size = btrfs_extent_inline_ref_size(want);
1457                 path->keep_locks = 1;
1458         } else
1459                 extra_size = -1;
1460
1461         /*
1462          * Owner is our level, so we can just add one to get the level for the
1463          * block we are interested in.
1464          */
1465         if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
1466                 key.type = BTRFS_METADATA_ITEM_KEY;
1467                 key.offset = owner;
1468         }
1469
1470 again:
1471         ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1472         if (ret < 0) {
1473                 err = ret;
1474                 goto out;
1475         }
1476
1477         /*
1478          * We may be a newly converted file system which still has the old fat
1479          * extent entries for metadata, so try and see if we have one of those.
1480          */
1481         if (ret > 0 && skinny_metadata) {
1482                 skinny_metadata = false;
1483                 if (path->slots[0]) {
1484                         path->slots[0]--;
1485                         btrfs_item_key_to_cpu(path->nodes[0], &key,
1486                                               path->slots[0]);
1487                         if (key.objectid == bytenr &&
1488                             key.type == BTRFS_EXTENT_ITEM_KEY &&
1489                             key.offset == num_bytes)
1490                                 ret = 0;
1491                 }
1492                 if (ret) {
1493                         key.objectid = bytenr;
1494                         key.type = BTRFS_EXTENT_ITEM_KEY;
1495                         key.offset = num_bytes;
1496                         btrfs_release_path(path);
1497                         goto again;
1498                 }
1499         }
1500
1501         if (ret && !insert) {
1502                 err = -ENOENT;
1503                 goto out;
1504         } else if (WARN_ON(ret)) {
1505                 err = -EIO;
1506                 goto out;
1507         }
1508
1509         leaf = path->nodes[0];
1510         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1511         if (unlikely(item_size < sizeof(*ei))) {
1512                 err = -EINVAL;
1513                 btrfs_print_v0_err(fs_info);
1514                 btrfs_abort_transaction(trans, err);
1515                 goto out;
1516         }
1517
1518         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1519         flags = btrfs_extent_flags(leaf, ei);
1520
1521         ptr = (unsigned long)(ei + 1);
1522         end = (unsigned long)ei + item_size;
1523
1524         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1525                 ptr += sizeof(struct btrfs_tree_block_info);
1526                 BUG_ON(ptr > end);
1527         }
1528
1529         if (owner >= BTRFS_FIRST_FREE_OBJECTID)
1530                 needed = BTRFS_REF_TYPE_DATA;
1531         else
1532                 needed = BTRFS_REF_TYPE_BLOCK;
1533
1534         err = -ENOENT;
1535         while (1) {
1536                 if (ptr >= end) {
1537                         WARN_ON(ptr > end);
1538                         break;
1539                 }
1540                 iref = (struct btrfs_extent_inline_ref *)ptr;
1541                 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
1542                 if (type == BTRFS_REF_TYPE_INVALID) {
1543                         err = -EUCLEAN;
1544                         goto out;
1545                 }
1546
1547                 if (want < type)
1548                         break;
1549                 if (want > type) {
1550                         ptr += btrfs_extent_inline_ref_size(type);
1551                         continue;
1552                 }
1553
1554                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1555                         struct btrfs_extent_data_ref *dref;
1556                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1557                         if (match_extent_data_ref(leaf, dref, root_objectid,
1558                                                   owner, offset)) {
1559                                 err = 0;
1560                                 break;
1561                         }
1562                         if (hash_extent_data_ref_item(leaf, dref) <
1563                             hash_extent_data_ref(root_objectid, owner, offset))
1564                                 break;
1565                 } else {
1566                         u64 ref_offset;
1567                         ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1568                         if (parent > 0) {
1569                                 if (parent == ref_offset) {
1570                                         err = 0;
1571                                         break;
1572                                 }
1573                                 if (ref_offset < parent)
1574                                         break;
1575                         } else {
1576                                 if (root_objectid == ref_offset) {
1577                                         err = 0;
1578                                         break;
1579                                 }
1580                                 if (ref_offset < root_objectid)
1581                                         break;
1582                         }
1583                 }
1584                 ptr += btrfs_extent_inline_ref_size(type);
1585         }
1586         if (err == -ENOENT && insert) {
1587                 if (item_size + extra_size >=
1588                     BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1589                         err = -EAGAIN;
1590                         goto out;
1591                 }
1592                 /*
1593                  * To add new inline back ref, we have to make sure
1594                  * there is no corresponding back ref item.
1595                  * For simplicity, we just do not add new inline back
1596                  * ref if there is any kind of item for this block
1597                  */
1598                 if (find_next_key(path, 0, &key) == 0 &&
1599                     key.objectid == bytenr &&
1600                     key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1601                         err = -EAGAIN;
1602                         goto out;
1603                 }
1604         }
1605         *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1606 out:
1607         if (insert) {
1608                 path->keep_locks = 0;
1609                 btrfs_unlock_up_safe(path, 1);
1610         }
1611         return err;
1612 }
1613
1614 /*
1615  * helper to add new inline back ref
1616  */
1617 static noinline_for_stack
1618 void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
1619                                  struct btrfs_path *path,
1620                                  struct btrfs_extent_inline_ref *iref,
1621                                  u64 parent, u64 root_objectid,
1622                                  u64 owner, u64 offset, int refs_to_add,
1623                                  struct btrfs_delayed_extent_op *extent_op)
1624 {
1625         struct extent_buffer *leaf;
1626         struct btrfs_extent_item *ei;
1627         unsigned long ptr;
1628         unsigned long end;
1629         unsigned long item_offset;
1630         u64 refs;
1631         int size;
1632         int type;
1633
1634         leaf = path->nodes[0];
1635         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1636         item_offset = (unsigned long)iref - (unsigned long)ei;
1637
1638         type = extent_ref_type(parent, owner);
1639         size = btrfs_extent_inline_ref_size(type);
1640
1641         btrfs_extend_item(path, size);
1642
1643         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1644         refs = btrfs_extent_refs(leaf, ei);
1645         refs += refs_to_add;
1646         btrfs_set_extent_refs(leaf, ei, refs);
1647         if (extent_op)
1648                 __run_delayed_extent_op(extent_op, leaf, ei);
1649
1650         ptr = (unsigned long)ei + item_offset;
1651         end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1652         if (ptr < end - size)
1653                 memmove_extent_buffer(leaf, ptr + size, ptr,
1654                                       end - size - ptr);
1655
1656         iref = (struct btrfs_extent_inline_ref *)ptr;
1657         btrfs_set_extent_inline_ref_type(leaf, iref, type);
1658         if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1659                 struct btrfs_extent_data_ref *dref;
1660                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1661                 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1662                 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1663                 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1664                 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1665         } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1666                 struct btrfs_shared_data_ref *sref;
1667                 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1668                 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1669                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1670         } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1671                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1672         } else {
1673                 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1674         }
1675         btrfs_mark_buffer_dirty(leaf);
1676 }
1677
1678 static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1679                                  struct btrfs_path *path,
1680                                  struct btrfs_extent_inline_ref **ref_ret,
1681                                  u64 bytenr, u64 num_bytes, u64 parent,
1682                                  u64 root_objectid, u64 owner, u64 offset)
1683 {
1684         int ret;
1685
1686         ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
1687                                            num_bytes, parent, root_objectid,
1688                                            owner, offset, 0);
1689         if (ret != -ENOENT)
1690                 return ret;
1691
1692         btrfs_release_path(path);
1693         *ref_ret = NULL;
1694
1695         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1696                 ret = lookup_tree_block_ref(trans, path, bytenr, parent,
1697                                             root_objectid);
1698         } else {
1699                 ret = lookup_extent_data_ref(trans, path, bytenr, parent,
1700                                              root_objectid, owner, offset);
1701         }
1702         return ret;
1703 }
1704
1705 /*
1706  * helper to update/remove inline back ref
1707  */
1708 static noinline_for_stack
1709 void update_inline_extent_backref(struct btrfs_path *path,
1710                                   struct btrfs_extent_inline_ref *iref,
1711                                   int refs_to_mod,
1712                                   struct btrfs_delayed_extent_op *extent_op,
1713                                   int *last_ref)
1714 {
1715         struct extent_buffer *leaf = path->nodes[0];
1716         struct btrfs_extent_item *ei;
1717         struct btrfs_extent_data_ref *dref = NULL;
1718         struct btrfs_shared_data_ref *sref = NULL;
1719         unsigned long ptr;
1720         unsigned long end;
1721         u32 item_size;
1722         int size;
1723         int type;
1724         u64 refs;
1725
1726         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1727         refs = btrfs_extent_refs(leaf, ei);
1728         WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1729         refs += refs_to_mod;
1730         btrfs_set_extent_refs(leaf, ei, refs);
1731         if (extent_op)
1732                 __run_delayed_extent_op(extent_op, leaf, ei);
1733
1734         /*
1735          * If type is invalid, we should have bailed out after
1736          * lookup_inline_extent_backref().
1737          */
1738         type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
1739         ASSERT(type != BTRFS_REF_TYPE_INVALID);
1740
1741         if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1742                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1743                 refs = btrfs_extent_data_ref_count(leaf, dref);
1744         } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1745                 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1746                 refs = btrfs_shared_data_ref_count(leaf, sref);
1747         } else {
1748                 refs = 1;
1749                 BUG_ON(refs_to_mod != -1);
1750         }
1751
1752         BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1753         refs += refs_to_mod;
1754
1755         if (refs > 0) {
1756                 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1757                         btrfs_set_extent_data_ref_count(leaf, dref, refs);
1758                 else
1759                         btrfs_set_shared_data_ref_count(leaf, sref, refs);
1760         } else {
1761                 *last_ref = 1;
1762                 size =  btrfs_extent_inline_ref_size(type);
1763                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1764                 ptr = (unsigned long)iref;
1765                 end = (unsigned long)ei + item_size;
1766                 if (ptr + size < end)
1767                         memmove_extent_buffer(leaf, ptr, ptr + size,
1768                                               end - ptr - size);
1769                 item_size -= size;
1770                 btrfs_truncate_item(path, item_size, 1);
1771         }
1772         btrfs_mark_buffer_dirty(leaf);
1773 }
1774
1775 static noinline_for_stack
1776 int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1777                                  struct btrfs_path *path,
1778                                  u64 bytenr, u64 num_bytes, u64 parent,
1779                                  u64 root_objectid, u64 owner,
1780                                  u64 offset, int refs_to_add,
1781                                  struct btrfs_delayed_extent_op *extent_op)
1782 {
1783         struct btrfs_extent_inline_ref *iref;
1784         int ret;
1785
1786         ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
1787                                            num_bytes, parent, root_objectid,
1788                                            owner, offset, 1);
1789         if (ret == 0) {
1790                 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1791                 update_inline_extent_backref(path, iref, refs_to_add,
1792                                              extent_op, NULL);
1793         } else if (ret == -ENOENT) {
1794                 setup_inline_extent_backref(trans->fs_info, path, iref, parent,
1795                                             root_objectid, owner, offset,
1796                                             refs_to_add, extent_op);
1797                 ret = 0;
1798         }
1799         return ret;
1800 }
1801
1802 static int insert_extent_backref(struct btrfs_trans_handle *trans,
1803                                  struct btrfs_path *path,
1804                                  u64 bytenr, u64 parent, u64 root_objectid,
1805                                  u64 owner, u64 offset, int refs_to_add)
1806 {
1807         int ret;
1808         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1809                 BUG_ON(refs_to_add != 1);
1810                 ret = insert_tree_block_ref(trans, path, bytenr, parent,
1811                                             root_objectid);
1812         } else {
1813                 ret = insert_extent_data_ref(trans, path, bytenr, parent,
1814                                              root_objectid, owner, offset,
1815                                              refs_to_add);
1816         }
1817         return ret;
1818 }
1819
1820 static int remove_extent_backref(struct btrfs_trans_handle *trans,
1821                                  struct btrfs_path *path,
1822                                  struct btrfs_extent_inline_ref *iref,
1823                                  int refs_to_drop, int is_data, int *last_ref)
1824 {
1825         int ret = 0;
1826
1827         BUG_ON(!is_data && refs_to_drop != 1);
1828         if (iref) {
1829                 update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
1830                                              last_ref);
1831         } else if (is_data) {
1832                 ret = remove_extent_data_ref(trans, path, refs_to_drop,
1833                                              last_ref);
1834         } else {
1835                 *last_ref = 1;
1836                 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
1837         }
1838         return ret;
1839 }
1840
1841 static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1842                                u64 *discarded_bytes)
1843 {
1844         int j, ret = 0;
1845         u64 bytes_left, end;
1846         u64 aligned_start = ALIGN(start, 1 << 9);
1847
1848         if (WARN_ON(start != aligned_start)) {
1849                 len -= aligned_start - start;
1850                 len = round_down(len, 1 << 9);
1851                 start = aligned_start;
1852         }
1853
1854         *discarded_bytes = 0;
1855
1856         if (!len)
1857                 return 0;
1858
1859         end = start + len;
1860         bytes_left = len;
1861
1862         /* Skip any superblocks on this device. */
1863         for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1864                 u64 sb_start = btrfs_sb_offset(j);
1865                 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1866                 u64 size = sb_start - start;
1867
1868                 if (!in_range(sb_start, start, bytes_left) &&
1869                     !in_range(sb_end, start, bytes_left) &&
1870                     !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1871                         continue;
1872
1873                 /*
1874                  * Superblock spans beginning of range.  Adjust start and
1875                  * try again.
1876                  */
1877                 if (sb_start <= start) {
1878                         start += sb_end - start;
1879                         if (start > end) {
1880                                 bytes_left = 0;
1881                                 break;
1882                         }
1883                         bytes_left = end - start;
1884                         continue;
1885                 }
1886
1887                 if (size) {
1888                         ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
1889                                                    GFP_NOFS, 0);
1890                         if (!ret)
1891                                 *discarded_bytes += size;
1892                         else if (ret != -EOPNOTSUPP)
1893                                 return ret;
1894                 }
1895
1896                 start = sb_end;
1897                 if (start > end) {
1898                         bytes_left = 0;
1899                         break;
1900                 }
1901                 bytes_left = end - start;
1902         }
1903
1904         if (bytes_left) {
1905                 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
1906                                            GFP_NOFS, 0);
1907                 if (!ret)
1908                         *discarded_bytes += bytes_left;
1909         }
1910         return ret;
1911 }
1912
1913 int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
1914                          u64 num_bytes, u64 *actual_bytes)
1915 {
1916         int ret;
1917         u64 discarded_bytes = 0;
1918         struct btrfs_bio *bbio = NULL;
1919
1920
1921         /*
1922          * Avoid races with device replace and make sure our bbio has devices
1923          * associated to its stripes that don't go away while we are discarding.
1924          */
1925         btrfs_bio_counter_inc_blocked(fs_info);
1926         /* Tell the block device(s) that the sectors can be discarded */
1927         ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, bytenr, &num_bytes,
1928                               &bbio, 0);
1929         /* Error condition is -ENOMEM */
1930         if (!ret) {
1931                 struct btrfs_bio_stripe *stripe = bbio->stripes;
1932                 int i;
1933
1934
1935                 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
1936                         u64 bytes;
1937                         struct request_queue *req_q;
1938
1939                         if (!stripe->dev->bdev) {
1940                                 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
1941                                 continue;
1942                         }
1943                         req_q = bdev_get_queue(stripe->dev->bdev);
1944                         if (!blk_queue_discard(req_q))
1945                                 continue;
1946
1947                         ret = btrfs_issue_discard(stripe->dev->bdev,
1948                                                   stripe->physical,
1949                                                   stripe->length,
1950                                                   &bytes);
1951                         if (!ret)
1952                                 discarded_bytes += bytes;
1953                         else if (ret != -EOPNOTSUPP)
1954                                 break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */
1955
1956                         /*
1957                          * Just in case we get back EOPNOTSUPP for some reason,
1958                          * just ignore the return value so we don't screw up
1959                          * people calling discard_extent.
1960                          */
1961                         ret = 0;
1962                 }
1963                 btrfs_put_bbio(bbio);
1964         }
1965         btrfs_bio_counter_dec(fs_info);
1966
1967         if (actual_bytes)
1968                 *actual_bytes = discarded_bytes;
1969
1970
1971         if (ret == -EOPNOTSUPP)
1972                 ret = 0;
1973         return ret;
1974 }
1975
1976 /* Can return -ENOMEM */
1977 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1978                          struct btrfs_ref *generic_ref)
1979 {
1980         struct btrfs_fs_info *fs_info = trans->fs_info;
1981         int old_ref_mod, new_ref_mod;
1982         int ret;
1983
1984         ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
1985                generic_ref->action);
1986         BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
1987                generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID);
1988
1989         if (generic_ref->type == BTRFS_REF_METADATA)
1990                 ret = btrfs_add_delayed_tree_ref(trans, generic_ref,
1991                                 NULL, &old_ref_mod, &new_ref_mod);
1992         else
1993                 ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0,
1994                                                  &old_ref_mod, &new_ref_mod);
1995
1996         btrfs_ref_tree_mod(fs_info, generic_ref);
1997
1998         if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
1999                 sub_pinned_bytes(fs_info, generic_ref);
2000
2001         return ret;
2002 }
2003
2004 /*
2005  * __btrfs_inc_extent_ref - insert backreference for a given extent
2006  *
2007  * @trans:          Handle of transaction
2008  *
2009  * @node:           The delayed ref node used to get the bytenr/length for
2010  *                  extent whose references are incremented.
2011  *
2012  * @parent:         If this is a shared extent (BTRFS_SHARED_DATA_REF_KEY/
2013  *                  BTRFS_SHARED_BLOCK_REF_KEY) then it holds the logical
2014  *                  bytenr of the parent block. Since new extents are always
2015  *                  created with indirect references, this will only be the case
2016  *                  when relocating a shared extent. In that case, root_objectid
2017  *                  will be BTRFS_TREE_RELOC_OBJECTID. Otheriwse, parent must
2018  *                  be 0
2019  *
2020  * @root_objectid:  The id of the root where this modification has originated,
2021  *                  this can be either one of the well-known metadata trees or
2022  *                  the subvolume id which references this extent.
2023  *
2024  * @owner:          For data extents it is the inode number of the owning file.
2025  *                  For metadata extents this parameter holds the level in the
2026  *                  tree of the extent.
2027  *
2028  * @offset:         For metadata extents the offset is ignored and is currently
2029  *                  always passed as 0. For data extents it is the fileoffset
2030  *                  this extent belongs to.
2031  *
2032  * @refs_to_add     Number of references to add
2033  *
2034  * @extent_op       Pointer to a structure, holding information necessary when
2035  *                  updating a tree block's flags
2036  *
2037  */
2038 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2039                                   struct btrfs_delayed_ref_node *node,
2040                                   u64 parent, u64 root_objectid,
2041                                   u64 owner, u64 offset, int refs_to_add,
2042                                   struct btrfs_delayed_extent_op *extent_op)
2043 {
2044         struct btrfs_path *path;
2045         struct extent_buffer *leaf;
2046         struct btrfs_extent_item *item;
2047         struct btrfs_key key;
2048         u64 bytenr = node->bytenr;
2049         u64 num_bytes = node->num_bytes;
2050         u64 refs;
2051         int ret;
2052
2053         path = btrfs_alloc_path();
2054         if (!path)
2055                 return -ENOMEM;
2056
2057         path->reada = READA_FORWARD;
2058         path->leave_spinning = 1;
2059         /* this will setup the path even if it fails to insert the back ref */
2060         ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
2061                                            parent, root_objectid, owner,
2062                                            offset, refs_to_add, extent_op);
2063         if ((ret < 0 && ret != -EAGAIN) || !ret)
2064                 goto out;
2065
2066         /*
2067          * Ok we had -EAGAIN which means we didn't have space to insert and
2068          * inline extent ref, so just update the reference count and add a
2069          * normal backref.
2070          */
2071         leaf = path->nodes[0];
2072         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2073         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2074         refs = btrfs_extent_refs(leaf, item);
2075         btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
2076         if (extent_op)
2077                 __run_delayed_extent_op(extent_op, leaf, item);
2078
2079         btrfs_mark_buffer_dirty(leaf);
2080         btrfs_release_path(path);
2081
2082         path->reada = READA_FORWARD;
2083         path->leave_spinning = 1;
2084         /* now insert the actual backref */
2085         ret = insert_extent_backref(trans, path, bytenr, parent, root_objectid,
2086                                     owner, offset, refs_to_add);
2087         if (ret)
2088                 btrfs_abort_transaction(trans, ret);
2089 out:
2090         btrfs_free_path(path);
2091         return ret;
2092 }
2093
2094 static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2095                                 struct btrfs_delayed_ref_node *node,
2096                                 struct btrfs_delayed_extent_op *extent_op,
2097                                 int insert_reserved)
2098 {
2099         int ret = 0;
2100         struct btrfs_delayed_data_ref *ref;
2101         struct btrfs_key ins;
2102         u64 parent = 0;
2103         u64 ref_root = 0;
2104         u64 flags = 0;
2105
2106         ins.objectid = node->bytenr;
2107         ins.offset = node->num_bytes;
2108         ins.type = BTRFS_EXTENT_ITEM_KEY;
2109
2110         ref = btrfs_delayed_node_to_data_ref(node);
2111         trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
2112
2113         if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2114                 parent = ref->parent;
2115         ref_root = ref->root;
2116
2117         if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2118                 if (extent_op)
2119                         flags |= extent_op->flags_to_set;
2120                 ret = alloc_reserved_file_extent(trans, parent, ref_root,
2121                                                  flags, ref->objectid,
2122                                                  ref->offset, &ins,
2123                                                  node->ref_mod);
2124         } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2125                 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
2126                                              ref->objectid, ref->offset,
2127                                              node->ref_mod, extent_op);
2128         } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2129                 ret = __btrfs_free_extent(trans, node, parent,
2130                                           ref_root, ref->objectid,
2131                                           ref->offset, node->ref_mod,
2132                                           extent_op);
2133         } else {
2134                 BUG();
2135         }
2136         return ret;
2137 }
2138
2139 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
2140                                     struct extent_buffer *leaf,
2141                                     struct btrfs_extent_item *ei)
2142 {
2143         u64 flags = btrfs_extent_flags(leaf, ei);
2144         if (extent_op->update_flags) {
2145                 flags |= extent_op->flags_to_set;
2146                 btrfs_set_extent_flags(leaf, ei, flags);
2147         }
2148
2149         if (extent_op->update_key) {
2150                 struct btrfs_tree_block_info *bi;
2151                 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2152                 bi = (struct btrfs_tree_block_info *)(ei + 1);
2153                 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
2154         }
2155 }
2156
2157 static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2158                                  struct btrfs_delayed_ref_head *head,
2159                                  struct btrfs_delayed_extent_op *extent_op)
2160 {
2161         struct btrfs_fs_info *fs_info = trans->fs_info;
2162         struct btrfs_key key;
2163         struct btrfs_path *path;
2164         struct btrfs_extent_item *ei;
2165         struct extent_buffer *leaf;
2166         u32 item_size;
2167         int ret;
2168         int err = 0;
2169         int metadata = !extent_op->is_data;
2170
2171         if (trans->aborted)
2172                 return 0;
2173
2174         if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2175                 metadata = 0;
2176
2177         path = btrfs_alloc_path();
2178         if (!path)
2179                 return -ENOMEM;
2180
2181         key.objectid = head->bytenr;
2182
2183         if (metadata) {
2184                 key.type = BTRFS_METADATA_ITEM_KEY;
2185                 key.offset = extent_op->level;
2186         } else {
2187                 key.type = BTRFS_EXTENT_ITEM_KEY;
2188                 key.offset = head->num_bytes;
2189         }
2190
2191 again:
2192         path->reada = READA_FORWARD;
2193         path->leave_spinning = 1;
2194         ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
2195         if (ret < 0) {
2196                 err = ret;
2197                 goto out;
2198         }
2199         if (ret > 0) {
2200                 if (metadata) {
2201                         if (path->slots[0] > 0) {
2202                                 path->slots[0]--;
2203                                 btrfs_item_key_to_cpu(path->nodes[0], &key,
2204                                                       path->slots[0]);
2205                                 if (key.objectid == head->bytenr &&
2206                                     key.type == BTRFS_EXTENT_ITEM_KEY &&
2207                                     key.offset == head->num_bytes)
2208                                         ret = 0;
2209                         }
2210                         if (ret > 0) {
2211                                 btrfs_release_path(path);
2212                                 metadata = 0;
2213
2214                                 key.objectid = head->bytenr;
2215                                 key.offset = head->num_bytes;
2216                                 key.type = BTRFS_EXTENT_ITEM_KEY;
2217                                 goto again;
2218                         }
2219                 } else {
2220                         err = -EIO;
2221                         goto out;
2222                 }
2223         }
2224
2225         leaf = path->nodes[0];
2226         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2227
2228         if (unlikely(item_size < sizeof(*ei))) {
2229                 err = -EINVAL;
2230                 btrfs_print_v0_err(fs_info);
2231                 btrfs_abort_transaction(trans, err);
2232                 goto out;
2233         }
2234
2235         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2236         __run_delayed_extent_op(extent_op, leaf, ei);
2237
2238         btrfs_mark_buffer_dirty(leaf);
2239 out:
2240         btrfs_free_path(path);
2241         return err;
2242 }
2243
2244 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2245                                 struct btrfs_delayed_ref_node *node,
2246                                 struct btrfs_delayed_extent_op *extent_op,
2247                                 int insert_reserved)
2248 {
2249         int ret = 0;
2250         struct btrfs_delayed_tree_ref *ref;
2251         u64 parent = 0;
2252         u64 ref_root = 0;
2253
2254         ref = btrfs_delayed_node_to_tree_ref(node);
2255         trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
2256
2257         if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2258                 parent = ref->parent;
2259         ref_root = ref->root;
2260
2261         if (node->ref_mod != 1) {
2262                 btrfs_err(trans->fs_info,
2263         "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
2264                           node->bytenr, node->ref_mod, node->action, ref_root,
2265                           parent);
2266                 return -EIO;
2267         }
2268         if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2269                 BUG_ON(!extent_op || !extent_op->update_flags);
2270                 ret = alloc_reserved_tree_block(trans, node, extent_op);
2271         } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2272                 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
2273                                              ref->level, 0, 1, extent_op);
2274         } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2275                 ret = __btrfs_free_extent(trans, node, parent, ref_root,
2276                                           ref->level, 0, 1, extent_op);
2277         } else {
2278                 BUG();
2279         }
2280         return ret;
2281 }
2282
2283 /* helper function to actually process a single delayed ref entry */
2284 static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2285                                struct btrfs_delayed_ref_node *node,
2286                                struct btrfs_delayed_extent_op *extent_op,
2287                                int insert_reserved)
2288 {
2289         int ret = 0;
2290
2291         if (trans->aborted) {
2292                 if (insert_reserved)
2293                         btrfs_pin_extent(trans->fs_info, node->bytenr,
2294                                          node->num_bytes, 1);
2295                 return 0;
2296         }
2297
2298         if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2299             node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2300                 ret = run_delayed_tree_ref(trans, node, extent_op,
2301                                            insert_reserved);
2302         else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
2303                  node->type == BTRFS_SHARED_DATA_REF_KEY)
2304                 ret = run_delayed_data_ref(trans, node, extent_op,
2305                                            insert_reserved);
2306         else
2307                 BUG();
2308         if (ret && insert_reserved)
2309                 btrfs_pin_extent(trans->fs_info, node->bytenr,
2310                                  node->num_bytes, 1);
2311         return ret;
2312 }
2313
2314 static inline struct btrfs_delayed_ref_node *
2315 select_delayed_ref(struct btrfs_delayed_ref_head *head)
2316 {
2317         struct btrfs_delayed_ref_node *ref;
2318
2319         if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
2320                 return NULL;
2321
2322         /*
2323          * Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
2324          * This is to prevent a ref count from going down to zero, which deletes
2325          * the extent item from the extent tree, when there still are references
2326          * to add, which would fail because they would not find the extent item.
2327          */
2328         if (!list_empty(&head->ref_add_list))
2329                 return list_first_entry(&head->ref_add_list,
2330                                 struct btrfs_delayed_ref_node, add_list);
2331
2332         ref = rb_entry(rb_first_cached(&head->ref_tree),
2333                        struct btrfs_delayed_ref_node, ref_node);
2334         ASSERT(list_empty(&ref->add_list));
2335         return ref;
2336 }
2337
2338 static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
2339                                       struct btrfs_delayed_ref_head *head)
2340 {
2341         spin_lock(&delayed_refs->lock);
2342         head->processing = 0;
2343         delayed_refs->num_heads_ready++;
2344         spin_unlock(&delayed_refs->lock);
2345         btrfs_delayed_ref_unlock(head);
2346 }
2347
2348 static struct btrfs_delayed_extent_op *cleanup_extent_op(
2349                                 struct btrfs_delayed_ref_head *head)
2350 {
2351         struct btrfs_delayed_extent_op *extent_op = head->extent_op;
2352
2353         if (!extent_op)
2354                 return NULL;
2355
2356         if (head->must_insert_reserved) {
2357                 head->extent_op = NULL;
2358                 btrfs_free_delayed_extent_op(extent_op);
2359                 return NULL;
2360         }
2361         return extent_op;
2362 }
2363
2364 static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
2365                                      struct btrfs_delayed_ref_head *head)
2366 {
2367         struct btrfs_delayed_extent_op *extent_op;
2368         int ret;
2369
2370         extent_op = cleanup_extent_op(head);
2371         if (!extent_op)
2372                 return 0;
2373         head->extent_op = NULL;
2374         spin_unlock(&head->lock);
2375         ret = run_delayed_extent_op(trans, head, extent_op);
2376         btrfs_free_delayed_extent_op(extent_op);
2377         return ret ? ret : 1;
2378 }
2379
2380 void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
2381                                   struct btrfs_delayed_ref_root *delayed_refs,
2382                                   struct btrfs_delayed_ref_head *head)
2383 {
2384         int nr_items = 1;       /* Dropping this ref head update. */
2385
2386         if (head->total_ref_mod < 0) {
2387                 struct btrfs_space_info *space_info;
2388                 u64 flags;
2389
2390                 if (head->is_data)
2391                         flags = BTRFS_BLOCK_GROUP_DATA;
2392                 else if (head->is_system)
2393                         flags = BTRFS_BLOCK_GROUP_SYSTEM;
2394                 else
2395                         flags = BTRFS_BLOCK_GROUP_METADATA;
2396                 space_info = btrfs_find_space_info(fs_info, flags);
2397                 ASSERT(space_info);
2398                 percpu_counter_add_batch(&space_info->total_bytes_pinned,
2399                                    -head->num_bytes,
2400                                    BTRFS_TOTAL_BYTES_PINNED_BATCH);
2401
2402                 /*
2403                  * We had csum deletions accounted for in our delayed refs rsv,
2404                  * we need to drop the csum leaves for this update from our
2405                  * delayed_refs_rsv.
2406                  */
2407                 if (head->is_data) {
2408                         spin_lock(&delayed_refs->lock);
2409                         delayed_refs->pending_csums -= head->num_bytes;
2410                         spin_unlock(&delayed_refs->lock);
2411                         nr_items += btrfs_csum_bytes_to_leaves(fs_info,
2412                                 head->num_bytes);
2413                 }
2414         }
2415
2416         btrfs_delayed_refs_rsv_release(fs_info, nr_items);
2417 }
2418
2419 static int cleanup_ref_head(struct btrfs_trans_handle *trans,
2420                             struct btrfs_delayed_ref_head *head)
2421 {
2422
2423         struct btrfs_fs_info *fs_info = trans->fs_info;
2424         struct btrfs_delayed_ref_root *delayed_refs;
2425         int ret;
2426
2427         delayed_refs = &trans->transaction->delayed_refs;
2428
2429         ret = run_and_cleanup_extent_op(trans, head);
2430         if (ret < 0) {
2431                 unselect_delayed_ref_head(delayed_refs, head);
2432                 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2433                 return ret;
2434         } else if (ret) {
2435                 return ret;
2436         }
2437
2438         /*
2439          * Need to drop our head ref lock and re-acquire the delayed ref lock
2440          * and then re-check to make sure nobody got added.
2441          */
2442         spin_unlock(&head->lock);
2443         spin_lock(&delayed_refs->lock);
2444         spin_lock(&head->lock);
2445         if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
2446                 spin_unlock(&head->lock);
2447                 spin_unlock(&delayed_refs->lock);
2448                 return 1;
2449         }
2450         btrfs_delete_ref_head(delayed_refs, head);
2451         spin_unlock(&head->lock);
2452         spin_unlock(&delayed_refs->lock);
2453
2454         if (head->must_insert_reserved) {
2455                 btrfs_pin_extent(fs_info, head->bytenr,
2456                                  head->num_bytes, 1);
2457                 if (head->is_data) {
2458                         ret = btrfs_del_csums(trans, fs_info, head->bytenr,
2459                                               head->num_bytes);
2460                 }
2461         }
2462
2463         btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
2464
2465         trace_run_delayed_ref_head(fs_info, head, 0);
2466         btrfs_delayed_ref_unlock(head);
2467         btrfs_put_delayed_ref_head(head);
2468         return 0;
2469 }
2470
2471 static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
2472                                         struct btrfs_trans_handle *trans)
2473 {
2474         struct btrfs_delayed_ref_root *delayed_refs =
2475                 &trans->transaction->delayed_refs;
2476         struct btrfs_delayed_ref_head *head = NULL;
2477         int ret;
2478
2479         spin_lock(&delayed_refs->lock);
2480         head = btrfs_select_ref_head(delayed_refs);
2481         if (!head) {
2482                 spin_unlock(&delayed_refs->lock);
2483                 return head;
2484         }
2485
2486         /*
2487          * Grab the lock that says we are going to process all the refs for
2488          * this head
2489          */
2490         ret = btrfs_delayed_ref_lock(delayed_refs, head);
2491         spin_unlock(&delayed_refs->lock);
2492
2493         /*
2494          * We may have dropped the spin lock to get the head mutex lock, and
2495          * that might have given someone else time to free the head.  If that's
2496          * true, it has been removed from our list and we can move on.
2497          */
2498         if (ret == -EAGAIN)
2499                 head = ERR_PTR(-EAGAIN);
2500
2501         return head;
2502 }
2503
2504 static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
2505                                     struct btrfs_delayed_ref_head *locked_ref,
2506                                     unsigned long *run_refs)
2507 {
2508         struct btrfs_fs_info *fs_info = trans->fs_info;
2509         struct btrfs_delayed_ref_root *delayed_refs;
2510         struct btrfs_delayed_extent_op *extent_op;
2511         struct btrfs_delayed_ref_node *ref;
2512         int must_insert_reserved = 0;
2513         int ret;
2514
2515         delayed_refs = &trans->transaction->delayed_refs;
2516
2517         lockdep_assert_held(&locked_ref->mutex);
2518         lockdep_assert_held(&locked_ref->lock);
2519
2520         while ((ref = select_delayed_ref(locked_ref))) {
2521                 if (ref->seq &&
2522                     btrfs_check_delayed_seq(fs_info, ref->seq)) {
2523                         spin_unlock(&locked_ref->lock);
2524                         unselect_delayed_ref_head(delayed_refs, locked_ref);
2525                         return -EAGAIN;
2526                 }
2527
2528                 (*run_refs)++;
2529                 ref->in_tree = 0;
2530                 rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
2531                 RB_CLEAR_NODE(&ref->ref_node);
2532                 if (!list_empty(&ref->add_list))
2533                         list_del(&ref->add_list);
2534                 /*
2535                  * When we play the delayed ref, also correct the ref_mod on
2536                  * head
2537                  */
2538                 switch (ref->action) {
2539                 case BTRFS_ADD_DELAYED_REF:
2540                 case BTRFS_ADD_DELAYED_EXTENT:
2541                         locked_ref->ref_mod -= ref->ref_mod;
2542                         break;
2543                 case BTRFS_DROP_DELAYED_REF:
2544                         locked_ref->ref_mod += ref->ref_mod;
2545                         break;
2546                 default:
2547                         WARN_ON(1);
2548                 }
2549                 atomic_dec(&delayed_refs->num_entries);
2550
2551                 /*
2552                  * Record the must_insert_reserved flag before we drop the
2553                  * spin lock.
2554                  */
2555                 must_insert_reserved = locked_ref->must_insert_reserved;
2556                 locked_ref->must_insert_reserved = 0;
2557
2558                 extent_op = locked_ref->extent_op;
2559                 locked_ref->extent_op = NULL;
2560                 spin_unlock(&locked_ref->lock);
2561
2562                 ret = run_one_delayed_ref(trans, ref, extent_op,
2563                                           must_insert_reserved);
2564
2565                 btrfs_free_delayed_extent_op(extent_op);
2566                 if (ret) {
2567                         unselect_delayed_ref_head(delayed_refs, locked_ref);
2568                         btrfs_put_delayed_ref(ref);
2569                         btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
2570                                     ret);
2571                         return ret;
2572                 }
2573
2574                 btrfs_put_delayed_ref(ref);
2575                 cond_resched();
2576
2577                 spin_lock(&locked_ref->lock);
2578                 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
2579         }
2580
2581         return 0;
2582 }
2583
2584 /*
2585  * Returns 0 on success or if called with an already aborted transaction.
2586  * Returns -ENOMEM or -EIO on failure and will abort the transaction.
2587  */
2588 static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2589                                              unsigned long nr)
2590 {
2591         struct btrfs_fs_info *fs_info = trans->fs_info;
2592         struct btrfs_delayed_ref_root *delayed_refs;
2593         struct btrfs_delayed_ref_head *locked_ref = NULL;
2594         ktime_t start = ktime_get();
2595         int ret;
2596         unsigned long count = 0;
2597         unsigned long actual_count = 0;
2598
2599         delayed_refs = &trans->transaction->delayed_refs;
2600         do {
2601                 if (!locked_ref) {
2602                         locked_ref = btrfs_obtain_ref_head(trans);
2603                         if (IS_ERR_OR_NULL(locked_ref)) {
2604                                 if (PTR_ERR(locked_ref) == -EAGAIN) {
2605                                         continue;
2606                                 } else {
2607                                         break;
2608                                 }
2609                         }
2610                         count++;
2611                 }
2612                 /*
2613                  * We need to try and merge add/drops of the same ref since we
2614                  * can run into issues with relocate dropping the implicit ref
2615                  * and then it being added back again before the drop can
2616                  * finish.  If we merged anything we need to re-loop so we can
2617                  * get a good ref.
2618                  * Or we can get node references of the same type that weren't
2619                  * merged when created due to bumps in the tree mod seq, and
2620                  * we need to merge them to prevent adding an inline extent
2621                  * backref before dropping it (triggering a BUG_ON at
2622                  * insert_inline_extent_backref()).
2623                  */
2624                 spin_lock(&locked_ref->lock);
2625                 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
2626
2627                 ret = btrfs_run_delayed_refs_for_head(trans, locked_ref,
2628                                                       &actual_count);
2629                 if (ret < 0 && ret != -EAGAIN) {
2630                         /*
2631                          * Error, btrfs_run_delayed_refs_for_head already
2632                          * unlocked everything so just bail out
2633                          */
2634                         return ret;
2635                 } else if (!ret) {
2636                         /*
2637                          * Success, perform the usual cleanup of a processed
2638                          * head
2639                          */
2640                         ret = cleanup_ref_head(trans, locked_ref);
2641                         if (ret > 0 ) {
2642                                 /* We dropped our lock, we need to loop. */
2643                                 ret = 0;
2644                                 continue;
2645                         } else if (ret) {
2646                                 return ret;
2647                         }
2648                 }
2649
2650                 /*
2651                  * Either success case or btrfs_run_delayed_refs_for_head
2652                  * returned -EAGAIN, meaning we need to select another head
2653                  */
2654
2655                 locked_ref = NULL;
2656                 cond_resched();
2657         } while ((nr != -1 && count < nr) || locked_ref);
2658
2659         /*
2660          * We don't want to include ref heads since we can have empty ref heads
2661          * and those will drastically skew our runtime down since we just do
2662          * accounting, no actual extent tree updates.
2663          */
2664         if (actual_count > 0) {
2665                 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2666                 u64 avg;
2667
2668                 /*
2669                  * We weigh the current average higher than our current runtime
2670                  * to avoid large swings in the average.
2671                  */
2672                 spin_lock(&delayed_refs->lock);
2673                 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2674                 fs_info->avg_delayed_ref_runtime = avg >> 2;    /* div by 4 */
2675                 spin_unlock(&delayed_refs->lock);
2676         }
2677         return 0;
2678 }
2679
2680 #ifdef SCRAMBLE_DELAYED_REFS
2681 /*
2682  * Normally delayed refs get processed in ascending bytenr order. This
2683  * correlates in most cases to the order added. To expose dependencies on this
2684  * order, we start to process the tree in the middle instead of the beginning
2685  */
2686 static u64 find_middle(struct rb_root *root)
2687 {
2688         struct rb_node *n = root->rb_node;
2689         struct btrfs_delayed_ref_node *entry;
2690         int alt = 1;
2691         u64 middle;
2692         u64 first = 0, last = 0;
2693
2694         n = rb_first(root);
2695         if (n) {
2696                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2697                 first = entry->bytenr;
2698         }
2699         n = rb_last(root);
2700         if (n) {
2701                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2702                 last = entry->bytenr;
2703         }
2704         n = root->rb_node;
2705
2706         while (n) {
2707                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2708                 WARN_ON(!entry->in_tree);
2709
2710                 middle = entry->bytenr;
2711
2712                 if (alt)
2713                         n = n->rb_left;
2714                 else
2715                         n = n->rb_right;
2716
2717                 alt = 1 - alt;
2718         }
2719         return middle;
2720 }
2721 #endif
2722
2723 static inline u64 heads_to_leaves(struct btrfs_fs_info *fs_info, u64 heads)
2724 {
2725         u64 num_bytes;
2726
2727         num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2728                              sizeof(struct btrfs_extent_inline_ref));
2729         if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2730                 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2731
2732         /*
2733          * We don't ever fill up leaves all the way so multiply by 2 just to be
2734          * closer to what we're really going to want to use.
2735          */
2736         return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(fs_info));
2737 }
2738
2739 /*
2740  * Takes the number of bytes to be csumm'ed and figures out how many leaves it
2741  * would require to store the csums for that many bytes.
2742  */
2743 u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
2744 {
2745         u64 csum_size;
2746         u64 num_csums_per_leaf;
2747         u64 num_csums;
2748
2749         csum_size = BTRFS_MAX_ITEM_SIZE(fs_info);
2750         num_csums_per_leaf = div64_u64(csum_size,
2751                         (u64)btrfs_super_csum_size(fs_info->super_copy));
2752         num_csums = div64_u64(csum_bytes, fs_info->sectorsize);
2753         num_csums += num_csums_per_leaf - 1;
2754         num_csums = div64_u64(num_csums, num_csums_per_leaf);
2755         return num_csums;
2756 }
2757
2758 /*
2759  * this starts processing the delayed reference count updates and
2760  * extent insertions we have queued up so far.  count can be
2761  * 0, which means to process everything in the tree at the start
2762  * of the run (but not newly added entries), or it can be some target
2763  * number you'd like to process.
2764  *
2765  * Returns 0 on success or if called with an aborted transaction
2766  * Returns <0 on error and aborts the transaction
2767  */
2768 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2769                            unsigned long count)
2770 {
2771         struct btrfs_fs_info *fs_info = trans->fs_info;
2772         struct rb_node *node;
2773         struct btrfs_delayed_ref_root *delayed_refs;
2774         struct btrfs_delayed_ref_head *head;
2775         int ret;
2776         int run_all = count == (unsigned long)-1;
2777
2778         /* We'll clean this up in btrfs_cleanup_transaction */
2779         if (trans->aborted)
2780                 return 0;
2781
2782         if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
2783                 return 0;
2784
2785         delayed_refs = &trans->transaction->delayed_refs;
2786         if (count == 0)
2787                 count = atomic_read(&delayed_refs->num_entries) * 2;
2788
2789 again:
2790 #ifdef SCRAMBLE_DELAYED_REFS
2791         delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2792 #endif
2793         ret = __btrfs_run_delayed_refs(trans, count);
2794         if (ret < 0) {
2795                 btrfs_abort_transaction(trans, ret);
2796                 return ret;
2797         }
2798
2799         if (run_all) {
2800                 btrfs_create_pending_block_groups(trans);
2801
2802                 spin_lock(&delayed_refs->lock);
2803                 node = rb_first_cached(&delayed_refs->href_root);
2804                 if (!node) {
2805                         spin_unlock(&delayed_refs->lock);
2806                         goto out;
2807                 }
2808                 head = rb_entry(node, struct btrfs_delayed_ref_head,
2809                                 href_node);
2810                 refcount_inc(&head->refs);
2811                 spin_unlock(&delayed_refs->lock);
2812
2813                 /* Mutex was contended, block until it's released and retry. */
2814                 mutex_lock(&head->mutex);
2815                 mutex_unlock(&head->mutex);
2816
2817                 btrfs_put_delayed_ref_head(head);
2818                 cond_resched();
2819                 goto again;
2820         }
2821 out:
2822         return 0;
2823 }
2824
2825 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2826                                 u64 bytenr, u64 num_bytes, u64 flags,
2827                                 int level, int is_data)
2828 {
2829         struct btrfs_delayed_extent_op *extent_op;
2830         int ret;
2831
2832         extent_op = btrfs_alloc_delayed_extent_op();
2833         if (!extent_op)
2834                 return -ENOMEM;
2835
2836         extent_op->flags_to_set = flags;
2837         extent_op->update_flags = true;
2838         extent_op->update_key = false;
2839         extent_op->is_data = is_data ? true : false;
2840         extent_op->level = level;
2841
2842         ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
2843         if (ret)
2844                 btrfs_free_delayed_extent_op(extent_op);
2845         return ret;
2846 }
2847
2848 static noinline int check_delayed_ref(struct btrfs_root *root,
2849                                       struct btrfs_path *path,
2850                                       u64 objectid, u64 offset, u64 bytenr)
2851 {
2852         struct btrfs_delayed_ref_head *head;
2853         struct btrfs_delayed_ref_node *ref;
2854         struct btrfs_delayed_data_ref *data_ref;
2855         struct btrfs_delayed_ref_root *delayed_refs;
2856         struct btrfs_transaction *cur_trans;
2857         struct rb_node *node;
2858         int ret = 0;
2859
2860         spin_lock(&root->fs_info->trans_lock);
2861         cur_trans = root->fs_info->running_transaction;
2862         if (cur_trans)
2863                 refcount_inc(&cur_trans->use_count);
2864         spin_unlock(&root->fs_info->trans_lock);
2865         if (!cur_trans)
2866                 return 0;
2867
2868         delayed_refs = &cur_trans->delayed_refs;
2869         spin_lock(&delayed_refs->lock);
2870         head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
2871         if (!head) {
2872                 spin_unlock(&delayed_refs->lock);
2873                 btrfs_put_transaction(cur_trans);
2874                 return 0;
2875         }
2876
2877         if (!mutex_trylock(&head->mutex)) {
2878                 refcount_inc(&head->refs);
2879                 spin_unlock(&delayed_refs->lock);
2880
2881                 btrfs_release_path(path);
2882
2883                 /*
2884                  * Mutex was contended, block until it's released and let
2885                  * caller try again
2886                  */
2887                 mutex_lock(&head->mutex);
2888                 mutex_unlock(&head->mutex);
2889                 btrfs_put_delayed_ref_head(head);
2890                 btrfs_put_transaction(cur_trans);
2891                 return -EAGAIN;
2892         }
2893         spin_unlock(&delayed_refs->lock);
2894
2895         spin_lock(&head->lock);
2896         /*
2897          * XXX: We should replace this with a proper search function in the
2898          * future.
2899          */
2900         for (node = rb_first_cached(&head->ref_tree); node;
2901              node = rb_next(node)) {
2902                 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
2903                 /* If it's a shared ref we know a cross reference exists */
2904                 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2905                         ret = 1;
2906                         break;
2907                 }
2908
2909                 data_ref = btrfs_delayed_node_to_data_ref(ref);
2910
2911                 /*
2912                  * If our ref doesn't match the one we're currently looking at
2913                  * then we have a cross reference.
2914                  */
2915                 if (data_ref->root != root->root_key.objectid ||
2916                     data_ref->objectid != objectid ||
2917                     data_ref->offset != offset) {
2918                         ret = 1;
2919                         break;
2920                 }
2921         }
2922         spin_unlock(&head->lock);
2923         mutex_unlock(&head->mutex);
2924         btrfs_put_transaction(cur_trans);
2925         return ret;
2926 }
2927
2928 static noinline int check_committed_ref(struct btrfs_root *root,
2929                                         struct btrfs_path *path,
2930                                         u64 objectid, u64 offset, u64 bytenr)
2931 {
2932         struct btrfs_fs_info *fs_info = root->fs_info;
2933         struct btrfs_root *extent_root = fs_info->extent_root;
2934         struct extent_buffer *leaf;
2935         struct btrfs_extent_data_ref *ref;
2936         struct btrfs_extent_inline_ref *iref;
2937         struct btrfs_extent_item *ei;
2938         struct btrfs_key key;
2939         u32 item_size;
2940         int type;
2941         int ret;
2942
2943         key.objectid = bytenr;
2944         key.offset = (u64)-1;
2945         key.type = BTRFS_EXTENT_ITEM_KEY;
2946
2947         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2948         if (ret < 0)
2949                 goto out;
2950         BUG_ON(ret == 0); /* Corruption */
2951
2952         ret = -ENOENT;
2953         if (path->slots[0] == 0)
2954                 goto out;
2955
2956         path->slots[0]--;
2957         leaf = path->nodes[0];
2958         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2959
2960         if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2961                 goto out;
2962
2963         ret = 1;
2964         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2965         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2966
2967         if (item_size != sizeof(*ei) +
2968             btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
2969                 goto out;
2970
2971         if (btrfs_extent_generation(leaf, ei) <=
2972             btrfs_root_last_snapshot(&root->root_item))
2973                 goto out;
2974
2975         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
2976
2977         type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
2978         if (type != BTRFS_EXTENT_DATA_REF_KEY)
2979                 goto out;
2980
2981         ref = (struct btrfs_extent_data_ref *)(&iref->offset);
2982         if (btrfs_extent_refs(leaf, ei) !=
2983             btrfs_extent_data_ref_count(leaf, ref) ||
2984             btrfs_extent_data_ref_root(leaf, ref) !=
2985             root->root_key.objectid ||
2986             btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2987             btrfs_extent_data_ref_offset(leaf, ref) != offset)
2988                 goto out;
2989
2990         ret = 0;
2991 out:
2992         return ret;
2993 }
2994
2995 int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
2996                           u64 bytenr)
2997 {
2998         struct btrfs_path *path;
2999         int ret;
3000
3001         path = btrfs_alloc_path();
3002         if (!path)
3003                 return -ENOMEM;
3004
3005         do {
3006                 ret = check_committed_ref(root, path, objectid,
3007                                           offset, bytenr);
3008                 if (ret && ret != -ENOENT)
3009                         goto out;
3010
3011                 ret = check_delayed_ref(root, path, objectid, offset, bytenr);
3012         } while (ret == -EAGAIN);
3013
3014 out:
3015         btrfs_free_path(path);
3016         if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3017                 WARN_ON(ret > 0);
3018         return ret;
3019 }
3020
3021 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3022                            struct btrfs_root *root,
3023                            struct extent_buffer *buf,
3024                            int full_backref, int inc)
3025 {
3026         struct btrfs_fs_info *fs_info = root->fs_info;
3027         u64 bytenr;
3028         u64 num_bytes;
3029         u64 parent;
3030         u64 ref_root;
3031         u32 nritems;
3032         struct btrfs_key key;
3033         struct btrfs_file_extent_item *fi;
3034         struct btrfs_ref generic_ref = { 0 };
3035         bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
3036         int i;
3037         int action;
3038         int level;
3039         int ret = 0;
3040
3041         if (btrfs_is_testing(fs_info))
3042                 return 0;
3043
3044         ref_root = btrfs_header_owner(buf);
3045         nritems = btrfs_header_nritems(buf);
3046         level = btrfs_header_level(buf);
3047
3048         if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
3049                 return 0;
3050
3051         if (full_backref)
3052                 parent = buf->start;
3053         else
3054                 parent = 0;
3055         if (inc)
3056                 action = BTRFS_ADD_DELAYED_REF;
3057         else
3058                 action = BTRFS_DROP_DELAYED_REF;
3059
3060         for (i = 0; i < nritems; i++) {
3061                 if (level == 0) {
3062                         btrfs_item_key_to_cpu(buf, &key, i);
3063                         if (key.type != BTRFS_EXTENT_DATA_KEY)
3064                                 continue;
3065                         fi = btrfs_item_ptr(buf, i,
3066                                             struct btrfs_file_extent_item);
3067                         if (btrfs_file_extent_type(buf, fi) ==
3068                             BTRFS_FILE_EXTENT_INLINE)
3069                                 continue;
3070                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
3071                         if (bytenr == 0)
3072                                 continue;
3073
3074                         num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
3075                         key.offset -= btrfs_file_extent_offset(buf, fi);
3076                         btrfs_init_generic_ref(&generic_ref, action, bytenr,
3077                                                num_bytes, parent);
3078                         generic_ref.real_root = root->root_key.objectid;
3079                         btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
3080                                             key.offset);
3081                         generic_ref.skip_qgroup = for_reloc;
3082                         if (inc)
3083                                 ret = btrfs_inc_extent_ref(trans, &generic_ref);
3084                         else
3085                                 ret = btrfs_free_extent(trans, &generic_ref);
3086                         if (ret)
3087                                 goto fail;