Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
[sfrench/cifs-2.6.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
22 #include <linux/sort.h>
23 #include <linux/rcupdate.h>
24 #include "compat.h"
25 #include "hash.h"
26 #include "crc32c.h"
27 #include "ctree.h"
28 #include "disk-io.h"
29 #include "print-tree.h"
30 #include "transaction.h"
31 #include "volumes.h"
32 #include "locking.h"
33 #include "ref-cache.h"
34
35 #define PENDING_EXTENT_INSERT 0
36 #define PENDING_EXTENT_DELETE 1
37 #define PENDING_BACKREF_UPDATE 2
38
39 struct pending_extent_op {
40         int type;
41         u64 bytenr;
42         u64 num_bytes;
43         u64 parent;
44         u64 orig_parent;
45         u64 generation;
46         u64 orig_generation;
47         int level;
48         struct list_head list;
49         int del;
50 };
51
52 static int finish_current_insert(struct btrfs_trans_handle *trans,
53                                  struct btrfs_root *extent_root, int all);
54 static int del_pending_extents(struct btrfs_trans_handle *trans,
55                                struct btrfs_root *extent_root, int all);
56 static int pin_down_bytes(struct btrfs_trans_handle *trans,
57                           struct btrfs_root *root,
58                           u64 bytenr, u64 num_bytes, int is_data);
59 static int update_block_group(struct btrfs_trans_handle *trans,
60                               struct btrfs_root *root,
61                               u64 bytenr, u64 num_bytes, int alloc,
62                               int mark_free);
63
64 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
65                           struct btrfs_root *extent_root, u64 alloc_bytes,
66                           u64 flags, int force);
67
68 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
69 {
70         return (cache->flags & bits) == bits;
71 }
72
73 /*
74  * this adds the block group to the fs_info rb tree for the block group
75  * cache
76  */
77 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
78                                 struct btrfs_block_group_cache *block_group)
79 {
80         struct rb_node **p;
81         struct rb_node *parent = NULL;
82         struct btrfs_block_group_cache *cache;
83
84         spin_lock(&info->block_group_cache_lock);
85         p = &info->block_group_cache_tree.rb_node;
86
87         while (*p) {
88                 parent = *p;
89                 cache = rb_entry(parent, struct btrfs_block_group_cache,
90                                  cache_node);
91                 if (block_group->key.objectid < cache->key.objectid) {
92                         p = &(*p)->rb_left;
93                 } else if (block_group->key.objectid > cache->key.objectid) {
94                         p = &(*p)->rb_right;
95                 } else {
96                         spin_unlock(&info->block_group_cache_lock);
97                         return -EEXIST;
98                 }
99         }
100
101         rb_link_node(&block_group->cache_node, parent, p);
102         rb_insert_color(&block_group->cache_node,
103                         &info->block_group_cache_tree);
104         spin_unlock(&info->block_group_cache_lock);
105
106         return 0;
107 }
108
109 /*
110  * This will return the block group at or after bytenr if contains is 0, else
111  * it will return the block group that contains the bytenr
112  */
113 static struct btrfs_block_group_cache *
114 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
115                               int contains)
116 {
117         struct btrfs_block_group_cache *cache, *ret = NULL;
118         struct rb_node *n;
119         u64 end, start;
120
121         spin_lock(&info->block_group_cache_lock);
122         n = info->block_group_cache_tree.rb_node;
123
124         while (n) {
125                 cache = rb_entry(n, struct btrfs_block_group_cache,
126                                  cache_node);
127                 end = cache->key.objectid + cache->key.offset - 1;
128                 start = cache->key.objectid;
129
130                 if (bytenr < start) {
131                         if (!contains && (!ret || start < ret->key.objectid))
132                                 ret = cache;
133                         n = n->rb_left;
134                 } else if (bytenr > start) {
135                         if (contains && bytenr <= end) {
136                                 ret = cache;
137                                 break;
138                         }
139                         n = n->rb_right;
140                 } else {
141                         ret = cache;
142                         break;
143                 }
144         }
145         if (ret)
146                 atomic_inc(&ret->count);
147         spin_unlock(&info->block_group_cache_lock);
148
149         return ret;
150 }
151
152 /*
153  * this is only called by cache_block_group, since we could have freed extents
154  * we need to check the pinned_extents for any extents that can't be used yet
155  * since their free space will be released as soon as the transaction commits.
156  */
157 static int add_new_free_space(struct btrfs_block_group_cache *block_group,
158                               struct btrfs_fs_info *info, u64 start, u64 end)
159 {
160         u64 extent_start, extent_end, size;
161         int ret;
162
163         mutex_lock(&info->pinned_mutex);
164         while (start < end) {
165                 ret = find_first_extent_bit(&info->pinned_extents, start,
166                                             &extent_start, &extent_end,
167                                             EXTENT_DIRTY);
168                 if (ret)
169                         break;
170
171                 if (extent_start == start) {
172                         start = extent_end + 1;
173                 } else if (extent_start > start && extent_start < end) {
174                         size = extent_start - start;
175                         ret = btrfs_add_free_space(block_group, start,
176                                                    size);
177                         BUG_ON(ret);
178                         start = extent_end + 1;
179                 } else {
180                         break;
181                 }
182         }
183
184         if (start < end) {
185                 size = end - start;
186                 ret = btrfs_add_free_space(block_group, start, size);
187                 BUG_ON(ret);
188         }
189         mutex_unlock(&info->pinned_mutex);
190
191         return 0;
192 }
193
194 static int remove_sb_from_cache(struct btrfs_root *root,
195                                 struct btrfs_block_group_cache *cache)
196 {
197         u64 bytenr;
198         u64 *logical;
199         int stripe_len;
200         int i, nr, ret;
201
202         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
203                 bytenr = btrfs_sb_offset(i);
204                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
205                                        cache->key.objectid, bytenr, 0,
206                                        &logical, &nr, &stripe_len);
207                 BUG_ON(ret);
208                 while (nr--) {
209                         btrfs_remove_free_space(cache, logical[nr],
210                                                 stripe_len);
211                 }
212                 kfree(logical);
213         }
214         return 0;
215 }
216
217 static int cache_block_group(struct btrfs_root *root,
218                              struct btrfs_block_group_cache *block_group)
219 {
220         struct btrfs_path *path;
221         int ret = 0;
222         struct btrfs_key key;
223         struct extent_buffer *leaf;
224         int slot;
225         u64 last;
226
227         if (!block_group)
228                 return 0;
229
230         root = root->fs_info->extent_root;
231
232         if (block_group->cached)
233                 return 0;
234
235         path = btrfs_alloc_path();
236         if (!path)
237                 return -ENOMEM;
238
239         path->reada = 2;
240         /*
241          * we get into deadlocks with paths held by callers of this function.
242          * since the alloc_mutex is protecting things right now, just
243          * skip the locking here
244          */
245         path->skip_locking = 1;
246         last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
247         key.objectid = last;
248         key.offset = 0;
249         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
250         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
251         if (ret < 0)
252                 goto err;
253
254         while (1) {
255                 leaf = path->nodes[0];
256                 slot = path->slots[0];
257                 if (slot >= btrfs_header_nritems(leaf)) {
258                         ret = btrfs_next_leaf(root, path);
259                         if (ret < 0)
260                                 goto err;
261                         if (ret == 0)
262                                 continue;
263                         else
264                                 break;
265                 }
266                 btrfs_item_key_to_cpu(leaf, &key, slot);
267                 if (key.objectid < block_group->key.objectid)
268                         goto next;
269
270                 if (key.objectid >= block_group->key.objectid +
271                     block_group->key.offset)
272                         break;
273
274                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
275                         add_new_free_space(block_group, root->fs_info, last,
276                                            key.objectid);
277
278                         last = key.objectid + key.offset;
279                 }
280 next:
281                 path->slots[0]++;
282         }
283
284         add_new_free_space(block_group, root->fs_info, last,
285                            block_group->key.objectid +
286                            block_group->key.offset);
287
288         remove_sb_from_cache(root, block_group);
289         block_group->cached = 1;
290         ret = 0;
291 err:
292         btrfs_free_path(path);
293         return ret;
294 }
295
296 /*
297  * return the block group that starts at or after bytenr
298  */
299 static struct btrfs_block_group_cache *
300 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
301 {
302         struct btrfs_block_group_cache *cache;
303
304         cache = block_group_cache_tree_search(info, bytenr, 0);
305
306         return cache;
307 }
308
309 /*
310  * return the block group that contains teh given bytenr
311  */
312 struct btrfs_block_group_cache *btrfs_lookup_block_group(
313                                                  struct btrfs_fs_info *info,
314                                                  u64 bytenr)
315 {
316         struct btrfs_block_group_cache *cache;
317
318         cache = block_group_cache_tree_search(info, bytenr, 1);
319
320         return cache;
321 }
322
323 static inline void put_block_group(struct btrfs_block_group_cache *cache)
324 {
325         if (atomic_dec_and_test(&cache->count))
326                 kfree(cache);
327 }
328
329 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
330                                                   u64 flags)
331 {
332         struct list_head *head = &info->space_info;
333         struct btrfs_space_info *found;
334
335         rcu_read_lock();
336         list_for_each_entry_rcu(found, head, list) {
337                 if (found->flags == flags) {
338                         rcu_read_unlock();
339                         return found;
340                 }
341         }
342         rcu_read_unlock();
343         return NULL;
344 }
345
346 /*
347  * after adding space to the filesystem, we need to clear the full flags
348  * on all the space infos.
349  */
350 void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
351 {
352         struct list_head *head = &info->space_info;
353         struct btrfs_space_info *found;
354
355         rcu_read_lock();
356         list_for_each_entry_rcu(found, head, list)
357                 found->full = 0;
358         rcu_read_unlock();
359 }
360
361 static u64 div_factor(u64 num, int factor)
362 {
363         if (factor == 10)
364                 return num;
365         num *= factor;
366         do_div(num, 10);
367         return num;
368 }
369
370 u64 btrfs_find_block_group(struct btrfs_root *root,
371                            u64 search_start, u64 search_hint, int owner)
372 {
373         struct btrfs_block_group_cache *cache;
374         u64 used;
375         u64 last = max(search_hint, search_start);
376         u64 group_start = 0;
377         int full_search = 0;
378         int factor = 9;
379         int wrapped = 0;
380 again:
381         while (1) {
382                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
383                 if (!cache)
384                         break;
385
386                 spin_lock(&cache->lock);
387                 last = cache->key.objectid + cache->key.offset;
388                 used = btrfs_block_group_used(&cache->item);
389
390                 if ((full_search || !cache->ro) &&
391                     block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
392                         if (used + cache->pinned + cache->reserved <
393                             div_factor(cache->key.offset, factor)) {
394                                 group_start = cache->key.objectid;
395                                 spin_unlock(&cache->lock);
396                                 put_block_group(cache);
397                                 goto found;
398                         }
399                 }
400                 spin_unlock(&cache->lock);
401                 put_block_group(cache);
402                 cond_resched();
403         }
404         if (!wrapped) {
405                 last = search_start;
406                 wrapped = 1;
407                 goto again;
408         }
409         if (!full_search && factor < 10) {
410                 last = search_start;
411                 full_search = 1;
412                 factor = 10;
413                 goto again;
414         }
415 found:
416         return group_start;
417 }
418
419 /* simple helper to search for an existing extent at a given offset */
420 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
421 {
422         int ret;
423         struct btrfs_key key;
424         struct btrfs_path *path;
425
426         path = btrfs_alloc_path();
427         BUG_ON(!path);
428         key.objectid = start;
429         key.offset = len;
430         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
431         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
432                                 0, 0);
433         btrfs_free_path(path);
434         return ret;
435 }
436
437 /*
438  * Back reference rules.  Back refs have three main goals:
439  *
440  * 1) differentiate between all holders of references to an extent so that
441  *    when a reference is dropped we can make sure it was a valid reference
442  *    before freeing the extent.
443  *
444  * 2) Provide enough information to quickly find the holders of an extent
445  *    if we notice a given block is corrupted or bad.
446  *
447  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
448  *    maintenance.  This is actually the same as #2, but with a slightly
449  *    different use case.
450  *
451  * File extents can be referenced by:
452  *
453  * - multiple snapshots, subvolumes, or different generations in one subvol
454  * - different files inside a single subvolume
455  * - different offsets inside a file (bookend extents in file.c)
456  *
457  * The extent ref structure has fields for:
458  *
459  * - Objectid of the subvolume root
460  * - Generation number of the tree holding the reference
461  * - objectid of the file holding the reference
462  * - number of references holding by parent node (alway 1 for tree blocks)
463  *
464  * Btree leaf may hold multiple references to a file extent. In most cases,
465  * these references are from same file and the corresponding offsets inside
466  * the file are close together.
467  *
468  * When a file extent is allocated the fields are filled in:
469  *     (root_key.objectid, trans->transid, inode objectid, 1)
470  *
471  * When a leaf is cow'd new references are added for every file extent found
472  * in the leaf.  It looks similar to the create case, but trans->transid will
473  * be different when the block is cow'd.
474  *
475  *     (root_key.objectid, trans->transid, inode objectid,
476  *      number of references in the leaf)
477  *
478  * When a file extent is removed either during snapshot deletion or
479  * file truncation, we find the corresponding back reference and check
480  * the following fields:
481  *
482  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
483  *      inode objectid)
484  *
485  * Btree extents can be referenced by:
486  *
487  * - Different subvolumes
488  * - Different generations of the same subvolume
489  *
490  * When a tree block is created, back references are inserted:
491  *
492  * (root->root_key.objectid, trans->transid, level, 1)
493  *
494  * When a tree block is cow'd, new back references are added for all the
495  * blocks it points to. If the tree block isn't in reference counted root,
496  * the old back references are removed. These new back references are of
497  * the form (trans->transid will have increased since creation):
498  *
499  * (root->root_key.objectid, trans->transid, level, 1)
500  *
501  * When a backref is in deleting, the following fields are checked:
502  *
503  * if backref was for a tree root:
504  *     (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
505  * else
506  *     (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
507  *
508  * Back Reference Key composing:
509  *
510  * The key objectid corresponds to the first byte in the extent, the key
511  * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
512  * byte of parent extent. If a extent is tree root, the key offset is set
513  * to the key objectid.
514  */
515
516 static noinline int lookup_extent_backref(struct btrfs_trans_handle *trans,
517                                           struct btrfs_root *root,
518                                           struct btrfs_path *path,
519                                           u64 bytenr, u64 parent,
520                                           u64 ref_root, u64 ref_generation,
521                                           u64 owner_objectid, int del)
522 {
523         struct btrfs_key key;
524         struct btrfs_extent_ref *ref;
525         struct extent_buffer *leaf;
526         u64 ref_objectid;
527         int ret;
528
529         key.objectid = bytenr;
530         key.type = BTRFS_EXTENT_REF_KEY;
531         key.offset = parent;
532
533         ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
534         if (ret < 0)
535                 goto out;
536         if (ret > 0) {
537                 ret = -ENOENT;
538                 goto out;
539         }
540
541         leaf = path->nodes[0];
542         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
543         ref_objectid = btrfs_ref_objectid(leaf, ref);
544         if (btrfs_ref_root(leaf, ref) != ref_root ||
545             btrfs_ref_generation(leaf, ref) != ref_generation ||
546             (ref_objectid != owner_objectid &&
547              ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
548                 ret = -EIO;
549                 WARN_ON(1);
550                 goto out;
551         }
552         ret = 0;
553 out:
554         return ret;
555 }
556
557 /*
558  * updates all the backrefs that are pending on update_list for the
559  * extent_root
560  */
561 static noinline int update_backrefs(struct btrfs_trans_handle *trans,
562                                     struct btrfs_root *extent_root,
563                                     struct btrfs_path *path,
564                                     struct list_head *update_list)
565 {
566         struct btrfs_key key;
567         struct btrfs_extent_ref *ref;
568         struct btrfs_fs_info *info = extent_root->fs_info;
569         struct pending_extent_op *op;
570         struct extent_buffer *leaf;
571         int ret = 0;
572         struct list_head *cur = update_list->next;
573         u64 ref_objectid;
574         u64 ref_root = extent_root->root_key.objectid;
575
576         op = list_entry(cur, struct pending_extent_op, list);
577
578 search:
579         key.objectid = op->bytenr;
580         key.type = BTRFS_EXTENT_REF_KEY;
581         key.offset = op->orig_parent;
582
583         ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
584         BUG_ON(ret);
585
586         leaf = path->nodes[0];
587
588 loop:
589         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
590
591         ref_objectid = btrfs_ref_objectid(leaf, ref);
592
593         if (btrfs_ref_root(leaf, ref) != ref_root ||
594             btrfs_ref_generation(leaf, ref) != op->orig_generation ||
595             (ref_objectid != op->level &&
596              ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
597                 printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, "
598                        "root %llu, owner %u\n",
599                        (unsigned long long)op->bytenr,
600                        (unsigned long long)op->orig_parent,
601                        (unsigned long long)ref_root, op->level);
602                 btrfs_print_leaf(extent_root, leaf);
603                 BUG();
604         }
605
606         key.objectid = op->bytenr;
607         key.offset = op->parent;
608         key.type = BTRFS_EXTENT_REF_KEY;
609         ret = btrfs_set_item_key_safe(trans, extent_root, path, &key);
610         BUG_ON(ret);
611         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
612         btrfs_set_ref_generation(leaf, ref, op->generation);
613
614         cur = cur->next;
615
616         list_del_init(&op->list);
617         unlock_extent(&info->extent_ins, op->bytenr,
618                       op->bytenr + op->num_bytes - 1, GFP_NOFS);
619         kfree(op);
620
621         if (cur == update_list) {
622                 btrfs_mark_buffer_dirty(path->nodes[0]);
623                 btrfs_release_path(extent_root, path);
624                 goto out;
625         }
626
627         op = list_entry(cur, struct pending_extent_op, list);
628
629         path->slots[0]++;
630         while (path->slots[0] < btrfs_header_nritems(leaf)) {
631                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
632                 if (key.objectid == op->bytenr &&
633                     key.type == BTRFS_EXTENT_REF_KEY)
634                         goto loop;
635                 path->slots[0]++;
636         }
637
638         btrfs_mark_buffer_dirty(path->nodes[0]);
639         btrfs_release_path(extent_root, path);
640         goto search;
641
642 out:
643         return 0;
644 }
645
646 static noinline int insert_extents(struct btrfs_trans_handle *trans,
647                                    struct btrfs_root *extent_root,
648                                    struct btrfs_path *path,
649                                    struct list_head *insert_list, int nr)
650 {
651         struct btrfs_key *keys;
652         u32 *data_size;
653         struct pending_extent_op *op;
654         struct extent_buffer *leaf;
655         struct list_head *cur = insert_list->next;
656         struct btrfs_fs_info *info = extent_root->fs_info;
657         u64 ref_root = extent_root->root_key.objectid;
658         int i = 0, last = 0, ret;
659         int total = nr * 2;
660
661         if (!nr)
662                 return 0;
663
664         keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS);
665         if (!keys)
666                 return -ENOMEM;
667
668         data_size = kzalloc(total * sizeof(u32), GFP_NOFS);
669         if (!data_size) {
670                 kfree(keys);
671                 return -ENOMEM;
672         }
673
674         list_for_each_entry(op, insert_list, list) {
675                 keys[i].objectid = op->bytenr;
676                 keys[i].offset = op->num_bytes;
677                 keys[i].type = BTRFS_EXTENT_ITEM_KEY;
678                 data_size[i] = sizeof(struct btrfs_extent_item);
679                 i++;
680
681                 keys[i].objectid = op->bytenr;
682                 keys[i].offset = op->parent;
683                 keys[i].type = BTRFS_EXTENT_REF_KEY;
684                 data_size[i] = sizeof(struct btrfs_extent_ref);
685                 i++;
686         }
687
688         op = list_entry(cur, struct pending_extent_op, list);
689         i = 0;
690         while (i < total) {
691                 int c;
692                 ret = btrfs_insert_some_items(trans, extent_root, path,
693                                               keys+i, data_size+i, total-i);
694                 BUG_ON(ret < 0);
695
696                 if (last && ret > 1)
697                         BUG();
698
699                 leaf = path->nodes[0];
700                 for (c = 0; c < ret; c++) {
701                         int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY;
702
703                         /*
704                          * if the first item we inserted was a backref, then
705                          * the EXTENT_ITEM will be the odd c's, else it will
706                          * be the even c's
707                          */
708                         if ((ref_first && (c % 2)) ||
709                             (!ref_first && !(c % 2))) {
710                                 struct btrfs_extent_item *itm;
711
712                                 itm = btrfs_item_ptr(leaf, path->slots[0] + c,
713                                                      struct btrfs_extent_item);
714                                 btrfs_set_extent_refs(path->nodes[0], itm, 1);
715                                 op->del++;
716                         } else {
717                                 struct btrfs_extent_ref *ref;
718
719                                 ref = btrfs_item_ptr(leaf, path->slots[0] + c,
720                                                      struct btrfs_extent_ref);
721                                 btrfs_set_ref_root(leaf, ref, ref_root);
722                                 btrfs_set_ref_generation(leaf, ref,
723                                                          op->generation);
724                                 btrfs_set_ref_objectid(leaf, ref, op->level);
725                                 btrfs_set_ref_num_refs(leaf, ref, 1);
726                                 op->del++;
727                         }
728
729                         /*
730                          * using del to see when its ok to free up the
731                          * pending_extent_op.  In the case where we insert the
732                          * last item on the list in order to help do batching
733                          * we need to not free the extent op until we actually
734                          * insert the extent_item
735                          */
736                         if (op->del == 2) {
737                                 unlock_extent(&info->extent_ins, op->bytenr,
738                                               op->bytenr + op->num_bytes - 1,
739                                               GFP_NOFS);
740                                 cur = cur->next;
741                                 list_del_init(&op->list);
742                                 kfree(op);
743                                 if (cur != insert_list)
744                                         op = list_entry(cur,
745                                                 struct pending_extent_op,
746                                                 list);
747                         }
748                 }
749                 btrfs_mark_buffer_dirty(leaf);
750                 btrfs_release_path(extent_root, path);
751
752                 /*
753                  * Ok backref's and items usually go right next to eachother,
754                  * but if we could only insert 1 item that means that we
755                  * inserted on the end of a leaf, and we have no idea what may
756                  * be on the next leaf so we just play it safe.  In order to
757                  * try and help this case we insert the last thing on our
758                  * insert list so hopefully it will end up being the last
759                  * thing on the leaf and everything else will be before it,
760                  * which will let us insert a whole bunch of items at the same
761                  * time.
762                  */
763                 if (ret == 1 && !last && (i + ret < total)) {
764                         /*
765                          * last: where we will pick up the next time around
766                          * i: our current key to insert, will be total - 1
767                          * cur: the current op we are screwing with
768                          * op: duh
769                          */
770                         last = i + ret;
771                         i = total - 1;
772                         cur = insert_list->prev;
773                         op = list_entry(cur, struct pending_extent_op, list);
774                 } else if (last) {
775                         /*
776                          * ok we successfully inserted the last item on the
777                          * list, lets reset everything
778                          *
779                          * i: our current key to insert, so where we left off
780                          *    last time
781                          * last: done with this
782                          * cur: the op we are messing with
783                          * op: duh
784                          * total: since we inserted the last key, we need to
785                          *        decrement total so we dont overflow
786                          */
787                         i = last;
788                         last = 0;
789                         total--;
790                         if (i < total) {
791                                 cur = insert_list->next;
792                                 op = list_entry(cur, struct pending_extent_op,
793                                                 list);
794                         }
795                 } else {
796                         i += ret;
797                 }
798
799                 cond_resched();
800         }
801         ret = 0;
802         kfree(keys);
803         kfree(data_size);
804         return ret;
805 }
806
807 static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
808                                           struct btrfs_root *root,
809                                           struct btrfs_path *path,
810                                           u64 bytenr, u64 parent,
811                                           u64 ref_root, u64 ref_generation,
812                                           u64 owner_objectid)
813 {
814         struct btrfs_key key;
815         struct extent_buffer *leaf;
816         struct btrfs_extent_ref *ref;
817         u32 num_refs;
818         int ret;
819
820         key.objectid = bytenr;
821         key.type = BTRFS_EXTENT_REF_KEY;
822         key.offset = parent;
823
824         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
825         if (ret == 0) {
826                 leaf = path->nodes[0];
827                 ref = btrfs_item_ptr(leaf, path->slots[0],
828                                      struct btrfs_extent_ref);
829                 btrfs_set_ref_root(leaf, ref, ref_root);
830                 btrfs_set_ref_generation(leaf, ref, ref_generation);
831                 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
832                 btrfs_set_ref_num_refs(leaf, ref, 1);
833         } else if (ret == -EEXIST) {
834                 u64 existing_owner;
835                 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
836                 leaf = path->nodes[0];
837                 ref = btrfs_item_ptr(leaf, path->slots[0],
838                                      struct btrfs_extent_ref);
839                 if (btrfs_ref_root(leaf, ref) != ref_root ||
840                     btrfs_ref_generation(leaf, ref) != ref_generation) {
841                         ret = -EIO;
842                         WARN_ON(1);
843                         goto out;
844                 }
845
846                 num_refs = btrfs_ref_num_refs(leaf, ref);
847                 BUG_ON(num_refs == 0);
848                 btrfs_set_ref_num_refs(leaf, ref, num_refs + 1);
849
850                 existing_owner = btrfs_ref_objectid(leaf, ref);
851                 if (existing_owner != owner_objectid &&
852                     existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
853                         btrfs_set_ref_objectid(leaf, ref,
854                                         BTRFS_MULTIPLE_OBJECTIDS);
855                 }
856                 ret = 0;
857         } else {
858                 goto out;
859         }
860         btrfs_mark_buffer_dirty(path->nodes[0]);
861 out:
862         btrfs_release_path(root, path);
863         return ret;
864 }
865
866 static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,
867                                           struct btrfs_root *root,
868                                           struct btrfs_path *path)
869 {
870         struct extent_buffer *leaf;
871         struct btrfs_extent_ref *ref;
872         u32 num_refs;
873         int ret = 0;
874
875         leaf = path->nodes[0];
876         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
877         num_refs = btrfs_ref_num_refs(leaf, ref);
878         BUG_ON(num_refs == 0);
879         num_refs -= 1;
880         if (num_refs == 0) {
881                 ret = btrfs_del_item(trans, root, path);
882         } else {
883                 btrfs_set_ref_num_refs(leaf, ref, num_refs);
884                 btrfs_mark_buffer_dirty(leaf);
885         }
886         btrfs_release_path(root, path);
887         return ret;
888 }
889
890 #ifdef BIO_RW_DISCARD
891 static void btrfs_issue_discard(struct block_device *bdev,
892                                 u64 start, u64 len)
893 {
894         blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
895 }
896 #endif
897
898 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
899                                 u64 num_bytes)
900 {
901 #ifdef BIO_RW_DISCARD
902         int ret;
903         u64 map_length = num_bytes;
904         struct btrfs_multi_bio *multi = NULL;
905
906         /* Tell the block device(s) that the sectors can be discarded */
907         ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
908                               bytenr, &map_length, &multi, 0);
909         if (!ret) {
910                 struct btrfs_bio_stripe *stripe = multi->stripes;
911                 int i;
912
913                 if (map_length > num_bytes)
914                         map_length = num_bytes;
915
916                 for (i = 0; i < multi->num_stripes; i++, stripe++) {
917                         btrfs_issue_discard(stripe->dev->bdev,
918                                             stripe->physical,
919                                             map_length);
920                 }
921                 kfree(multi);
922         }
923
924         return ret;
925 #else
926         return 0;
927 #endif
928 }
929
930 static noinline int free_extents(struct btrfs_trans_handle *trans,
931                                  struct btrfs_root *extent_root,
932                                  struct list_head *del_list)
933 {
934         struct btrfs_fs_info *info = extent_root->fs_info;
935         struct btrfs_path *path;
936         struct btrfs_key key, found_key;
937         struct extent_buffer *leaf;
938         struct list_head *cur;
939         struct pending_extent_op *op;
940         struct btrfs_extent_item *ei;
941         int ret, num_to_del, extent_slot = 0, found_extent = 0;
942         u32 refs;
943         u64 bytes_freed = 0;
944
945         path = btrfs_alloc_path();
946         if (!path)
947                 return -ENOMEM;
948         path->reada = 1;
949
950 search:
951         /* search for the backref for the current ref we want to delete */
952         cur = del_list->next;
953         op = list_entry(cur, struct pending_extent_op, list);
954         ret = lookup_extent_backref(trans, extent_root, path, op->bytenr,
955                                     op->orig_parent,
956                                     extent_root->root_key.objectid,
957                                     op->orig_generation, op->level, 1);
958         if (ret) {
959                 printk(KERN_ERR "btrfs unable to find backref byte nr %llu "
960                        "root %llu gen %llu owner %u\n",
961                        (unsigned long long)op->bytenr,
962                        (unsigned long long)extent_root->root_key.objectid,
963                        (unsigned long long)op->orig_generation, op->level);
964                 btrfs_print_leaf(extent_root, path->nodes[0]);
965                 WARN_ON(1);
966                 goto out;
967         }
968
969         extent_slot = path->slots[0];
970         num_to_del = 1;
971         found_extent = 0;
972
973         /*
974          * if we aren't the first item on the leaf we can move back one and see
975          * if our ref is right next to our extent item
976          */
977         if (likely(extent_slot)) {
978                 extent_slot--;
979                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
980                                       extent_slot);
981                 if (found_key.objectid == op->bytenr &&
982                     found_key.type == BTRFS_EXTENT_ITEM_KEY &&
983                     found_key.offset == op->num_bytes) {
984                         num_to_del++;
985                         found_extent = 1;
986                 }
987         }
988
989         /*
990          * if we didn't find the extent we need to delete the backref and then
991          * search for the extent item key so we can update its ref count
992          */
993         if (!found_extent) {
994                 key.objectid = op->bytenr;
995                 key.type = BTRFS_EXTENT_ITEM_KEY;
996                 key.offset = op->num_bytes;
997
998                 ret = remove_extent_backref(trans, extent_root, path);
999                 BUG_ON(ret);
1000                 btrfs_release_path(extent_root, path);
1001                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1002                 BUG_ON(ret);
1003                 extent_slot = path->slots[0];
1004         }
1005
1006         /* this is where we update the ref count for the extent */
1007         leaf = path->nodes[0];
1008         ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item);
1009         refs = btrfs_extent_refs(leaf, ei);
1010         BUG_ON(refs == 0);
1011         refs--;
1012         btrfs_set_extent_refs(leaf, ei, refs);
1013
1014         btrfs_mark_buffer_dirty(leaf);
1015
1016         /*
1017          * This extent needs deleting.  The reason cur_slot is extent_slot +
1018          * num_to_del is because extent_slot points to the slot where the extent
1019          * is, and if the backref was not right next to the extent we will be
1020          * deleting at least 1 item, and will want to start searching at the
1021          * slot directly next to extent_slot.  However if we did find the
1022          * backref next to the extent item them we will be deleting at least 2
1023          * items and will want to start searching directly after the ref slot
1024          */
1025         if (!refs) {
1026                 struct list_head *pos, *n, *end;
1027                 int cur_slot = extent_slot+num_to_del;
1028                 u64 super_used;
1029                 u64 root_used;
1030
1031                 path->slots[0] = extent_slot;
1032                 bytes_freed = op->num_bytes;
1033
1034                 mutex_lock(&info->pinned_mutex);
1035                 ret = pin_down_bytes(trans, extent_root, op->bytenr,
1036                                      op->num_bytes, op->level >=
1037                                      BTRFS_FIRST_FREE_OBJECTID);
1038                 mutex_unlock(&info->pinned_mutex);
1039                 BUG_ON(ret < 0);
1040                 op->del = ret;
1041
1042                 /*
1043                  * we need to see if we can delete multiple things at once, so
1044                  * start looping through the list of extents we are wanting to
1045                  * delete and see if their extent/backref's are right next to
1046                  * eachother and the extents only have 1 ref
1047                  */
1048                 for (pos = cur->next; pos != del_list; pos = pos->next) {
1049                         struct pending_extent_op *tmp;
1050
1051                         tmp = list_entry(pos, struct pending_extent_op, list);
1052
1053                         /* we only want to delete extent+ref at this stage */
1054                         if (cur_slot >= btrfs_header_nritems(leaf) - 1)
1055                                 break;
1056
1057                         btrfs_item_key_to_cpu(leaf, &found_key, cur_slot);
1058                         if (found_key.objectid != tmp->bytenr ||
1059                             found_key.type != BTRFS_EXTENT_ITEM_KEY ||
1060                             found_key.offset != tmp->num_bytes)
1061                                 break;
1062
1063                         /* check to make sure this extent only has one ref */
1064                         ei = btrfs_item_ptr(leaf, cur_slot,
1065                                             struct btrfs_extent_item);
1066                         if (btrfs_extent_refs(leaf, ei) != 1)
1067                                 break;
1068
1069                         btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1);
1070                         if (found_key.objectid != tmp->bytenr ||
1071                             found_key.type != BTRFS_EXTENT_REF_KEY ||
1072                             found_key.offset != tmp->orig_parent)
1073                                 break;
1074
1075                         /*
1076                          * the ref is right next to the extent, we can set the
1077                          * ref count to 0 since we will delete them both now
1078                          */
1079                         btrfs_set_extent_refs(leaf, ei, 0);
1080
1081                         /* pin down the bytes for this extent */
1082                         mutex_lock(&info->pinned_mutex);
1083                         ret = pin_down_bytes(trans, extent_root, tmp->bytenr,
1084                                              tmp->num_bytes, tmp->level >=
1085                                              BTRFS_FIRST_FREE_OBJECTID);
1086                         mutex_unlock(&info->pinned_mutex);
1087                         BUG_ON(ret < 0);
1088
1089                         /*
1090                          * use the del field to tell if we need to go ahead and
1091                          * free up the extent when we delete the item or not.
1092                          */
1093                         tmp->del = ret;
1094                         bytes_freed += tmp->num_bytes;
1095
1096                         num_to_del += 2;
1097                         cur_slot += 2;
1098                 }
1099                 end = pos;
1100
1101                 /* update the free space counters */
1102                 spin_lock(&info->delalloc_lock);
1103                 super_used = btrfs_super_bytes_used(&info->super_copy);
1104                 btrfs_set_super_bytes_used(&info->super_copy,
1105                                            super_used - bytes_freed);
1106
1107                 root_used = btrfs_root_used(&extent_root->root_item);
1108                 btrfs_set_root_used(&extent_root->root_item,
1109                                     root_used - bytes_freed);
1110                 spin_unlock(&info->delalloc_lock);
1111
1112                 /* delete the items */
1113                 ret = btrfs_del_items(trans, extent_root, path,
1114                                       path->slots[0], num_to_del);
1115                 BUG_ON(ret);
1116
1117                 /*
1118                  * loop through the extents we deleted and do the cleanup work
1119                  * on them
1120                  */
1121                 for (pos = cur, n = pos->next; pos != end;
1122                      pos = n, n = pos->next) {
1123                         struct pending_extent_op *tmp;
1124                         tmp = list_entry(pos, struct pending_extent_op, list);
1125
1126                         /*
1127                          * remember tmp->del tells us wether or not we pinned
1128                          * down the extent
1129                          */
1130                         ret = update_block_group(trans, extent_root,
1131                                                  tmp->bytenr, tmp->num_bytes, 0,
1132                                                  tmp->del);
1133                         BUG_ON(ret);
1134
1135                         list_del_init(&tmp->list);
1136                         unlock_extent(&info->extent_ins, tmp->bytenr,
1137                                       tmp->bytenr + tmp->num_bytes - 1,
1138                                       GFP_NOFS);
1139                         kfree(tmp);
1140                 }
1141         } else if (refs && found_extent) {
1142                 /*
1143                  * the ref and extent were right next to eachother, but the
1144                  * extent still has a ref, so just free the backref and keep
1145                  * going
1146                  */
1147                 ret = remove_extent_backref(trans, extent_root, path);
1148                 BUG_ON(ret);
1149
1150                 list_del_init(&op->list);
1151                 unlock_extent(&info->extent_ins, op->bytenr,
1152                               op->bytenr + op->num_bytes - 1, GFP_NOFS);
1153                 kfree(op);
1154         } else {
1155                 /*
1156                  * the extent has multiple refs and the backref we were looking
1157                  * for was not right next to it, so just unlock and go next,
1158                  * we're good to go
1159                  */
1160                 list_del_init(&op->list);
1161                 unlock_extent(&info->extent_ins, op->bytenr,
1162                               op->bytenr + op->num_bytes - 1, GFP_NOFS);
1163                 kfree(op);
1164         }
1165
1166         btrfs_release_path(extent_root, path);
1167         if (!list_empty(del_list))
1168                 goto search;
1169
1170 out:
1171         btrfs_free_path(path);
1172         return ret;
1173 }
1174
1175 static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1176                                      struct btrfs_root *root, u64 bytenr,
1177                                      u64 orig_parent, u64 parent,
1178                                      u64 orig_root, u64 ref_root,
1179                                      u64 orig_generation, u64 ref_generation,
1180                                      u64 owner_objectid)
1181 {
1182         int ret;
1183         struct btrfs_root *extent_root = root->fs_info->extent_root;
1184         struct btrfs_path *path;
1185
1186         if (root == root->fs_info->extent_root) {
1187                 struct pending_extent_op *extent_op;
1188                 u64 num_bytes;
1189
1190                 BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
1191                 num_bytes = btrfs_level_size(root, (int)owner_objectid);
1192                 mutex_lock(&root->fs_info->extent_ins_mutex);
1193                 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
1194                                 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
1195                         u64 priv;
1196                         ret = get_state_private(&root->fs_info->extent_ins,
1197                                                 bytenr, &priv);
1198                         BUG_ON(ret);
1199                         extent_op = (struct pending_extent_op *)
1200                                                         (unsigned long)priv;
1201                         BUG_ON(extent_op->parent != orig_parent);
1202                         BUG_ON(extent_op->generation != orig_generation);
1203
1204                         extent_op->parent = parent;
1205                         extent_op->generation = ref_generation;
1206                 } else {
1207                         extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
1208                         BUG_ON(!extent_op);
1209
1210                         extent_op->type = PENDING_BACKREF_UPDATE;
1211                         extent_op->bytenr = bytenr;
1212                         extent_op->num_bytes = num_bytes;
1213                         extent_op->parent = parent;
1214                         extent_op->orig_parent = orig_parent;
1215                         extent_op->generation = ref_generation;
1216                         extent_op->orig_generation = orig_generation;
1217                         extent_op->level = (int)owner_objectid;
1218                         INIT_LIST_HEAD(&extent_op->list);
1219                         extent_op->del = 0;
1220
1221                         set_extent_bits(&root->fs_info->extent_ins,
1222                                         bytenr, bytenr + num_bytes - 1,
1223                                         EXTENT_WRITEBACK, GFP_NOFS);
1224                         set_state_private(&root->fs_info->extent_ins,
1225                                           bytenr, (unsigned long)extent_op);
1226                 }
1227                 mutex_unlock(&root->fs_info->extent_ins_mutex);
1228                 return 0;
1229         }
1230
1231         path = btrfs_alloc_path();
1232         if (!path)
1233                 return -ENOMEM;
1234         ret = lookup_extent_backref(trans, extent_root, path,
1235                                     bytenr, orig_parent, orig_root,
1236                                     orig_generation, owner_objectid, 1);
1237         if (ret)
1238                 goto out;
1239         ret = remove_extent_backref(trans, extent_root, path);
1240         if (ret)
1241                 goto out;
1242         ret = insert_extent_backref(trans, extent_root, path, bytenr,
1243                                     parent, ref_root, ref_generation,
1244                                     owner_objectid);
1245         BUG_ON(ret);
1246         finish_current_insert(trans, extent_root, 0);
1247         del_pending_extents(trans, extent_root, 0);
1248 out:
1249         btrfs_free_path(path);
1250         return ret;
1251 }
1252
1253 int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1254                             struct btrfs_root *root, u64 bytenr,
1255                             u64 orig_parent, u64 parent,
1256                             u64 ref_root, u64 ref_generation,
1257                             u64 owner_objectid)
1258 {
1259         int ret;
1260         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
1261             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
1262                 return 0;
1263         ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent,
1264                                         parent, ref_root, ref_root,
1265                                         ref_generation, ref_generation,
1266                                         owner_objectid);
1267         return ret;
1268 }
1269
1270 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1271                                   struct btrfs_root *root, u64 bytenr,
1272                                   u64 orig_parent, u64 parent,
1273                                   u64 orig_root, u64 ref_root,
1274                                   u64 orig_generation, u64 ref_generation,
1275                                   u64 owner_objectid)
1276 {
1277         struct btrfs_path *path;
1278         int ret;
1279         struct btrfs_key key;
1280         struct extent_buffer *l;
1281         struct btrfs_extent_item *item;
1282         u32 refs;
1283
1284         path = btrfs_alloc_path();
1285         if (!path)
1286                 return -ENOMEM;
1287
1288         path->reada = 1;
1289         key.objectid = bytenr;
1290         key.type = BTRFS_EXTENT_ITEM_KEY;
1291         key.offset = (u64)-1;
1292
1293         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
1294                                 0, 1);
1295         if (ret < 0)
1296                 return ret;
1297         BUG_ON(ret == 0 || path->slots[0] == 0);
1298
1299         path->slots[0]--;
1300         l = path->nodes[0];
1301
1302         btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1303         if (key.objectid != bytenr) {
1304                 btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]);
1305                 printk(KERN_ERR "btrfs wanted %llu found %llu\n",
1306                        (unsigned long long)bytenr,
1307                        (unsigned long long)key.objectid);
1308                 BUG();
1309         }
1310         BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
1311
1312         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1313         refs = btrfs_extent_refs(l, item);
1314         btrfs_set_extent_refs(l, item, refs + 1);
1315         btrfs_mark_buffer_dirty(path->nodes[0]);
1316
1317         btrfs_release_path(root->fs_info->extent_root, path);
1318
1319         path->reada = 1;
1320         ret = insert_extent_backref(trans, root->fs_info->extent_root,
1321                                     path, bytenr, parent,
1322                                     ref_root, ref_generation,
1323                                     owner_objectid);
1324         BUG_ON(ret);
1325         finish_current_insert(trans, root->fs_info->extent_root, 0);
1326         del_pending_extents(trans, root->fs_info->extent_root, 0);
1327
1328         btrfs_free_path(path);
1329         return 0;
1330 }
1331
1332 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1333                          struct btrfs_root *root,
1334                          u64 bytenr, u64 num_bytes, u64 parent,
1335                          u64 ref_root, u64 ref_generation,
1336                          u64 owner_objectid)
1337 {
1338         int ret;
1339         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
1340             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
1341                 return 0;
1342         ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent,
1343                                      0, ref_root, 0, ref_generation,
1344                                      owner_objectid);
1345         return ret;
1346 }
1347
1348 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
1349                          struct btrfs_root *root)
1350 {
1351         u64 start;
1352         u64 end;
1353         int ret;
1354
1355         while(1) {
1356                 finish_current_insert(trans, root->fs_info->extent_root, 1);
1357                 del_pending_extents(trans, root->fs_info->extent_root, 1);
1358
1359                 /* is there more work to do? */
1360                 ret = find_first_extent_bit(&root->fs_info->pending_del,
1361                                             0, &start, &end, EXTENT_WRITEBACK);
1362                 if (!ret)
1363                         continue;
1364                 ret = find_first_extent_bit(&root->fs_info->extent_ins,
1365                                             0, &start, &end, EXTENT_WRITEBACK);
1366                 if (!ret)
1367                         continue;
1368                 break;
1369         }
1370         return 0;
1371 }
1372
1373 int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
1374                             struct btrfs_root *root, u64 bytenr,
1375                             u64 num_bytes, u32 *refs)
1376 {
1377         struct btrfs_path *path;
1378         int ret;
1379         struct btrfs_key key;
1380         struct extent_buffer *l;
1381         struct btrfs_extent_item *item;
1382
1383         WARN_ON(num_bytes < root->sectorsize);
1384         path = btrfs_alloc_path();
1385         path->reada = 1;
1386         key.objectid = bytenr;
1387         key.offset = num_bytes;
1388         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1389         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
1390                                 0, 0);
1391         if (ret < 0)
1392                 goto out;
1393         if (ret != 0) {
1394                 btrfs_print_leaf(root, path->nodes[0]);
1395                 printk(KERN_INFO "btrfs failed to find block number %llu\n",
1396                        (unsigned long long)bytenr);
1397                 BUG();
1398         }
1399         l = path->nodes[0];
1400         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1401         *refs = btrfs_extent_refs(l, item);
1402 out:
1403         btrfs_free_path(path);
1404         return 0;
1405 }
1406
1407 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
1408                           struct btrfs_root *root, u64 objectid, u64 bytenr)
1409 {
1410         struct btrfs_root *extent_root = root->fs_info->extent_root;
1411         struct btrfs_path *path;
1412         struct extent_buffer *leaf;
1413         struct btrfs_extent_ref *ref_item;
1414         struct btrfs_key key;
1415         struct btrfs_key found_key;
1416         u64 ref_root;
1417         u64 last_snapshot;
1418         u32 nritems;
1419         int ret;
1420
1421         key.objectid = bytenr;
1422         key.offset = (u64)-1;
1423         key.type = BTRFS_EXTENT_ITEM_KEY;
1424
1425         path = btrfs_alloc_path();
1426         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1427         if (ret < 0)
1428                 goto out;
1429         BUG_ON(ret == 0);
1430
1431         ret = -ENOENT;
1432         if (path->slots[0] == 0)
1433                 goto out;
1434
1435         path->slots[0]--;
1436         leaf = path->nodes[0];
1437         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1438
1439         if (found_key.objectid != bytenr ||
1440             found_key.type != BTRFS_EXTENT_ITEM_KEY)
1441                 goto out;
1442
1443         last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1444         while (1) {
1445                 leaf = path->nodes[0];
1446                 nritems = btrfs_header_nritems(leaf);
1447                 if (path->slots[0] >= nritems) {
1448                         ret = btrfs_next_leaf(extent_root, path);
1449                         if (ret < 0)
1450                                 goto out;
1451                         if (ret == 0)
1452                                 continue;
1453                         break;
1454                 }
1455                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1456                 if (found_key.objectid != bytenr)
1457                         break;
1458
1459                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
1460                         path->slots[0]++;
1461                         continue;
1462                 }
1463
1464                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
1465                                           struct btrfs_extent_ref);
1466                 ref_root = btrfs_ref_root(leaf, ref_item);
1467                 if ((ref_root != root->root_key.objectid &&
1468                      ref_root != BTRFS_TREE_LOG_OBJECTID) ||
1469                      objectid != btrfs_ref_objectid(leaf, ref_item)) {
1470                         ret = 1;
1471                         goto out;
1472                 }
1473                 if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) {
1474                         ret = 1;
1475                         goto out;
1476                 }
1477
1478                 path->slots[0]++;
1479         }
1480         ret = 0;
1481 out:
1482         btrfs_free_path(path);
1483         return ret;
1484 }
1485
1486 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1487                     struct extent_buffer *buf, u32 nr_extents)
1488 {
1489         struct btrfs_key key;
1490         struct btrfs_file_extent_item *fi;
1491         u64 root_gen;
1492         u32 nritems;
1493         int i;
1494         int level;
1495         int ret = 0;
1496         int shared = 0;
1497
1498         if (!root->ref_cows)
1499                 return 0;
1500
1501         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1502                 shared = 0;
1503                 root_gen = root->root_key.offset;
1504         } else {
1505                 shared = 1;
1506                 root_gen = trans->transid - 1;
1507         }
1508
1509         level = btrfs_header_level(buf);
1510         nritems = btrfs_header_nritems(buf);
1511
1512         if (level == 0) {
1513                 struct btrfs_leaf_ref *ref;
1514                 struct btrfs_extent_info *info;
1515
1516                 ref = btrfs_alloc_leaf_ref(root, nr_extents);
1517                 if (!ref) {
1518                         ret = -ENOMEM;
1519                         goto out;
1520                 }
1521
1522                 ref->root_gen = root_gen;
1523                 ref->bytenr = buf->start;
1524                 ref->owner = btrfs_header_owner(buf);
1525                 ref->generation = btrfs_header_generation(buf);
1526                 ref->nritems = nr_extents;
1527                 info = ref->extents;
1528
1529                 for (i = 0; nr_extents > 0 && i < nritems; i++) {
1530                         u64 disk_bytenr;
1531                         btrfs_item_key_to_cpu(buf, &key, i);
1532                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1533                                 continue;
1534                         fi = btrfs_item_ptr(buf, i,
1535                                             struct btrfs_file_extent_item);
1536                         if (btrfs_file_extent_type(buf, fi) ==
1537                             BTRFS_FILE_EXTENT_INLINE)
1538                                 continue;
1539                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1540                         if (disk_bytenr == 0)
1541                                 continue;
1542
1543                         info->bytenr = disk_bytenr;
1544                         info->num_bytes =
1545                                 btrfs_file_extent_disk_num_bytes(buf, fi);
1546                         info->objectid = key.objectid;
1547                         info->offset = key.offset;
1548                         info++;
1549                 }
1550
1551                 ret = btrfs_add_leaf_ref(root, ref, shared);
1552                 if (ret == -EEXIST && shared) {
1553                         struct btrfs_leaf_ref *old;
1554                         old = btrfs_lookup_leaf_ref(root, ref->bytenr);
1555                         BUG_ON(!old);
1556                         btrfs_remove_leaf_ref(root, old);
1557                         btrfs_free_leaf_ref(root, old);
1558                         ret = btrfs_add_leaf_ref(root, ref, shared);
1559                 }
1560                 WARN_ON(ret);
1561                 btrfs_free_leaf_ref(root, ref);
1562         }
1563 out:
1564         return ret;
1565 }
1566
1567 /* when a block goes through cow, we update the reference counts of
1568  * everything that block points to.  The internal pointers of the block
1569  * can be in just about any order, and it is likely to have clusters of
1570  * things that are close together and clusters of things that are not.
1571  *
1572  * To help reduce the seeks that come with updating all of these reference
1573  * counts, sort them by byte number before actual updates are done.
1574  *
1575  * struct refsort is used to match byte number to slot in the btree block.
1576  * we sort based on the byte number and then use the slot to actually
1577  * find the item.
1578  *
1579  * struct refsort is smaller than strcut btrfs_item and smaller than
1580  * struct btrfs_key_ptr.  Since we're currently limited to the page size
1581  * for a btree block, there's no way for a kmalloc of refsorts for a
1582  * single node to be bigger than a page.
1583  */
1584 struct refsort {
1585         u64 bytenr;
1586         u32 slot;
1587 };
1588
1589 /*
1590  * for passing into sort()
1591  */
1592 static int refsort_cmp(const void *a_void, const void *b_void)
1593 {
1594         const struct refsort *a = a_void;
1595         const struct refsort *b = b_void;
1596
1597         if (a->bytenr < b->bytenr)
1598                 return -1;
1599         if (a->bytenr > b->bytenr)
1600                 return 1;
1601         return 0;
1602 }
1603
1604
1605 noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
1606                            struct btrfs_root *root,
1607                            struct extent_buffer *orig_buf,
1608                            struct extent_buffer *buf, u32 *nr_extents)
1609 {
1610         u64 bytenr;
1611         u64 ref_root;
1612         u64 orig_root;
1613         u64 ref_generation;
1614         u64 orig_generation;
1615         struct refsort *sorted;
1616         u32 nritems;
1617         u32 nr_file_extents = 0;
1618         struct btrfs_key key;
1619         struct btrfs_file_extent_item *fi;
1620         int i;
1621         int level;
1622         int ret = 0;
1623         int faili = 0;
1624         int refi = 0;
1625         int slot;
1626         int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
1627                             u64, u64, u64, u64, u64, u64, u64, u64);
1628
1629         ref_root = btrfs_header_owner(buf);
1630         ref_generation = btrfs_header_generation(buf);
1631         orig_root = btrfs_header_owner(orig_buf);
1632         orig_generation = btrfs_header_generation(orig_buf);
1633
1634         nritems = btrfs_header_nritems(buf);
1635         level = btrfs_header_level(buf);
1636
1637         sorted = kmalloc(sizeof(struct refsort) * nritems, GFP_NOFS);
1638         BUG_ON(!sorted);
1639
1640         if (root->ref_cows) {
1641                 process_func = __btrfs_inc_extent_ref;
1642         } else {
1643                 if (level == 0 &&
1644                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1645                         goto out;
1646                 if (level != 0 &&
1647                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1648                         goto out;
1649                 process_func = __btrfs_update_extent_ref;
1650         }
1651
1652         /*
1653          * we make two passes through the items.  In the first pass we
1654          * only record the byte number and slot.  Then we sort based on
1655          * byte number and do the actual work based on the sorted results
1656          */
1657         for (i = 0; i < nritems; i++) {
1658                 cond_resched();
1659                 if (level == 0) {
1660                         btrfs_item_key_to_cpu(buf, &key, i);
1661                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1662                                 continue;
1663                         fi = btrfs_item_ptr(buf, i,
1664                                             struct btrfs_file_extent_item);
1665                         if (btrfs_file_extent_type(buf, fi) ==
1666                             BTRFS_FILE_EXTENT_INLINE)
1667                                 continue;
1668                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1669                         if (bytenr == 0)
1670                                 continue;
1671
1672                         nr_file_extents++;
1673                         sorted[refi].bytenr = bytenr;
1674                         sorted[refi].slot = i;
1675                         refi++;
1676                 } else {
1677                         bytenr = btrfs_node_blockptr(buf, i);
1678                         sorted[refi].bytenr = bytenr;
1679                         sorted[refi].slot = i;
1680                         refi++;
1681                 }
1682         }
1683         /*
1684          * if refi == 0, we didn't actually put anything into the sorted
1685          * array and we're done
1686          */
1687         if (refi == 0)
1688                 goto out;
1689
1690         sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
1691
1692         for (i = 0; i < refi; i++) {
1693                 cond_resched();
1694                 slot = sorted[i].slot;
1695                 bytenr = sorted[i].bytenr;
1696
1697                 if (level == 0) {
1698                         btrfs_item_key_to_cpu(buf, &key, slot);
1699
1700                         ret = process_func(trans, root, bytenr,
1701                                            orig_buf->start, buf->start,
1702                                            orig_root, ref_root,
1703                                            orig_generation, ref_generation,
1704                                            key.objectid);
1705
1706                         if (ret) {
1707                                 faili = slot;
1708                                 WARN_ON(1);
1709                                 goto fail;
1710                         }
1711                 } else {
1712                         ret = process_func(trans, root, bytenr,
1713                                            orig_buf->start, buf->start,
1714                                            orig_root, ref_root,
1715                                            orig_generation, ref_generation,
1716                                            level - 1);
1717                         if (ret) {
1718                                 faili = slot;
1719                                 WARN_ON(1);
1720                                 goto fail;
1721                         }
1722                 }
1723         }
1724 out:
1725         kfree(sorted);
1726         if (nr_extents) {
1727                 if (level == 0)
1728                         *nr_extents = nr_file_extents;
1729                 else
1730                         *nr_extents = nritems;
1731         }
1732         return 0;
1733 fail:
1734         kfree(sorted);
1735         WARN_ON(1);
1736         return ret;
1737 }
1738
1739 int btrfs_update_ref(struct btrfs_trans_handle *trans,
1740                      struct btrfs_root *root, struct extent_buffer *orig_buf,
1741                      struct extent_buffer *buf, int start_slot, int nr)
1742
1743 {
1744         u64 bytenr;
1745         u64 ref_root;
1746         u64 orig_root;
1747         u64 ref_generation;
1748         u64 orig_generation;
1749         struct btrfs_key key;
1750         struct btrfs_file_extent_item *fi;
1751         int i;
1752         int ret;
1753         int slot;
1754         int level;
1755
1756         BUG_ON(start_slot < 0);
1757         BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
1758
1759         ref_root = btrfs_header_owner(buf);
1760         ref_generation = btrfs_header_generation(buf);
1761         orig_root = btrfs_header_owner(orig_buf);
1762         orig_generation = btrfs_header_generation(orig_buf);
1763         level = btrfs_header_level(buf);
1764
1765         if (!root->ref_cows) {
1766                 if (level == 0 &&
1767                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1768                         return 0;
1769                 if (level != 0 &&
1770                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1771                         return 0;
1772         }
1773
1774         for (i = 0, slot = start_slot; i < nr; i++, slot++) {
1775                 cond_resched();
1776                 if (level == 0) {
1777                         btrfs_item_key_to_cpu(buf, &key, slot);
1778                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1779                                 continue;
1780                         fi = btrfs_item_ptr(buf, slot,
1781                                             struct btrfs_file_extent_item);
1782                         if (btrfs_file_extent_type(buf, fi) ==
1783                             BTRFS_FILE_EXTENT_INLINE)
1784                                 continue;
1785                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1786                         if (bytenr == 0)
1787                                 continue;
1788                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1789                                             orig_buf->start, buf->start,
1790                                             orig_root, ref_root,
1791                                             orig_generation, ref_generation,
1792                                             key.objectid);
1793                         if (ret)
1794                                 goto fail;
1795                 } else {
1796                         bytenr = btrfs_node_blockptr(buf, slot);
1797                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1798                                             orig_buf->start, buf->start,
1799                                             orig_root, ref_root,
1800                                             orig_generation, ref_generation,
1801                                             level - 1);
1802                         if (ret)
1803                                 goto fail;
1804                 }
1805         }
1806         return 0;
1807 fail:
1808         WARN_ON(1);
1809         return -1;
1810 }
1811
1812 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1813                                  struct btrfs_root *root,
1814                                  struct btrfs_path *path,
1815                                  struct btrfs_block_group_cache *cache)
1816 {
1817         int ret;
1818         int pending_ret;
1819         struct btrfs_root *extent_root = root->fs_info->extent_root;
1820         unsigned long bi;
1821         struct extent_buffer *leaf;
1822
1823         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1824         if (ret < 0)
1825                 goto fail;
1826         BUG_ON(ret);
1827
1828         leaf = path->nodes[0];
1829         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1830         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1831         btrfs_mark_buffer_dirty(leaf);
1832         btrfs_release_path(extent_root, path);
1833 fail:
1834         finish_current_insert(trans, extent_root, 0);
1835         pending_ret = del_pending_extents(trans, extent_root, 0);
1836         if (ret)
1837                 return ret;
1838         if (pending_ret)
1839                 return pending_ret;
1840         return 0;
1841
1842 }
1843
1844 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1845                                    struct btrfs_root *root)
1846 {
1847         struct btrfs_block_group_cache *cache, *entry;
1848         struct rb_node *n;
1849         int err = 0;
1850         int werr = 0;
1851         struct btrfs_path *path;
1852         u64 last = 0;
1853
1854         path = btrfs_alloc_path();
1855         if (!path)
1856                 return -ENOMEM;
1857
1858         while (1) {
1859                 cache = NULL;
1860                 spin_lock(&root->fs_info->block_group_cache_lock);
1861                 for (n = rb_first(&root->fs_info->block_group_cache_tree);
1862                      n; n = rb_next(n)) {
1863                         entry = rb_entry(n, struct btrfs_block_group_cache,
1864                                          cache_node);
1865                         if (entry->dirty) {
1866                                 cache = entry;
1867                                 break;
1868                         }
1869                 }
1870                 spin_unlock(&root->fs_info->block_group_cache_lock);
1871
1872                 if (!cache)
1873                         break;
1874
1875                 cache->dirty = 0;
1876                 last += cache->key.offset;
1877
1878                 err = write_one_cache_group(trans, root,
1879                                             path, cache);
1880                 /*
1881                  * if we fail to write the cache group, we want
1882                  * to keep it marked dirty in hopes that a later
1883                  * write will work
1884                  */
1885                 if (err) {
1886                         werr = err;
1887                         continue;
1888                 }
1889         }
1890         btrfs_free_path(path);
1891         return werr;
1892 }
1893
1894 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
1895 {
1896         struct btrfs_block_group_cache *block_group;
1897         int readonly = 0;
1898
1899         block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
1900         if (!block_group || block_group->ro)
1901                 readonly = 1;
1902         if (block_group)
1903                 put_block_group(block_group);
1904         return readonly;
1905 }
1906
1907 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1908                              u64 total_bytes, u64 bytes_used,
1909                              struct btrfs_space_info **space_info)
1910 {
1911         struct btrfs_space_info *found;
1912
1913         found = __find_space_info(info, flags);
1914         if (found) {
1915                 spin_lock(&found->lock);
1916                 found->total_bytes += total_bytes;
1917                 found->bytes_used += bytes_used;
1918                 found->full = 0;
1919                 spin_unlock(&found->lock);
1920                 *space_info = found;
1921                 return 0;
1922         }
1923         found = kzalloc(sizeof(*found), GFP_NOFS);
1924         if (!found)
1925                 return -ENOMEM;
1926
1927         INIT_LIST_HEAD(&found->block_groups);
1928         init_rwsem(&found->groups_sem);
1929         spin_lock_init(&found->lock);
1930         found->flags = flags;
1931         found->total_bytes = total_bytes;
1932         found->bytes_used = bytes_used;
1933         found->bytes_pinned = 0;
1934         found->bytes_reserved = 0;
1935         found->bytes_readonly = 0;
1936         found->bytes_delalloc = 0;
1937         found->full = 0;
1938         found->force_alloc = 0;
1939         *space_info = found;
1940         list_add_rcu(&found->list, &info->space_info);
1941         return 0;
1942 }
1943
1944 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1945 {
1946         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1947                                    BTRFS_BLOCK_GROUP_RAID1 |
1948                                    BTRFS_BLOCK_GROUP_RAID10 |
1949                                    BTRFS_BLOCK_GROUP_DUP);
1950         if (extra_flags) {
1951                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1952                         fs_info->avail_data_alloc_bits |= extra_flags;
1953                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1954                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1955                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1956                         fs_info->avail_system_alloc_bits |= extra_flags;
1957         }
1958 }
1959
1960 static void set_block_group_readonly(struct btrfs_block_group_cache *cache)
1961 {
1962         spin_lock(&cache->space_info->lock);
1963         spin_lock(&cache->lock);
1964         if (!cache->ro) {
1965                 cache->space_info->bytes_readonly += cache->key.offset -
1966                                         btrfs_block_group_used(&cache->item);
1967                 cache->ro = 1;
1968         }
1969         spin_unlock(&cache->lock);
1970         spin_unlock(&cache->space_info->lock);
1971 }
1972
1973 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1974 {
1975         u64 num_devices = root->fs_info->fs_devices->rw_devices;
1976
1977         if (num_devices == 1)
1978                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1979         if (num_devices < 4)
1980                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1981
1982         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1983             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1984                       BTRFS_BLOCK_GROUP_RAID10))) {
1985                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1986         }
1987
1988         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1989             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1990                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1991         }
1992
1993         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1994             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1995              (flags & BTRFS_BLOCK_GROUP_RAID10) |
1996              (flags & BTRFS_BLOCK_GROUP_DUP)))
1997                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1998         return flags;
1999 }
2000
2001 static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data)
2002 {
2003         struct btrfs_fs_info *info = root->fs_info;
2004         u64 alloc_profile;
2005
2006         if (data) {
2007                 alloc_profile = info->avail_data_alloc_bits &
2008                         info->data_alloc_profile;
2009                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
2010         } else if (root == root->fs_info->chunk_root) {
2011                 alloc_profile = info->avail_system_alloc_bits &
2012                         info->system_alloc_profile;
2013                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
2014         } else {
2015                 alloc_profile = info->avail_metadata_alloc_bits &
2016                         info->metadata_alloc_profile;
2017                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
2018         }
2019
2020         return btrfs_reduce_alloc_profile(root, data);
2021 }
2022
2023 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
2024 {
2025         u64 alloc_target;
2026
2027         alloc_target = btrfs_get_alloc_profile(root, 1);
2028         BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
2029                                                        alloc_target);
2030 }
2031
2032 /*
2033  * for now this just makes sure we have at least 5% of our metadata space free
2034  * for use.
2035  */
2036 int btrfs_check_metadata_free_space(struct btrfs_root *root)
2037 {
2038         struct btrfs_fs_info *info = root->fs_info;
2039         struct btrfs_space_info *meta_sinfo;
2040         u64 alloc_target, thresh;
2041         int committed = 0, ret;
2042
2043         /* get the space info for where the metadata will live */
2044         alloc_target = btrfs_get_alloc_profile(root, 0);
2045         meta_sinfo = __find_space_info(info, alloc_target);
2046
2047 again:
2048         spin_lock(&meta_sinfo->lock);
2049         if (!meta_sinfo->full)
2050                 thresh = meta_sinfo->total_bytes * 80;
2051         else
2052                 thresh = meta_sinfo->total_bytes * 95;
2053
2054         do_div(thresh, 100);
2055
2056         if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2057             meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) {
2058                 struct btrfs_trans_handle *trans;
2059                 if (!meta_sinfo->full) {
2060                         meta_sinfo->force_alloc = 1;
2061                         spin_unlock(&meta_sinfo->lock);
2062
2063                         trans = btrfs_start_transaction(root, 1);
2064                         if (!trans)
2065                                 return -ENOMEM;
2066
2067                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2068                                              2 * 1024 * 1024, alloc_target, 0);
2069                         btrfs_end_transaction(trans, root);
2070                         goto again;
2071                 }
2072                 spin_unlock(&meta_sinfo->lock);
2073
2074                 if (!committed) {
2075                         committed = 1;
2076                         trans = btrfs_join_transaction(root, 1);
2077                         if (!trans)
2078                                 return -ENOMEM;
2079                         ret = btrfs_commit_transaction(trans, root);
2080                         if (ret)
2081                                 return ret;
2082                         goto again;
2083                 }
2084                 return -ENOSPC;
2085         }
2086         spin_unlock(&meta_sinfo->lock);
2087
2088         return 0;
2089 }
2090
2091 /*
2092  * This will check the space that the inode allocates from to make sure we have
2093  * enough space for bytes.
2094  */
2095 int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
2096                                 u64 bytes)
2097 {
2098         struct btrfs_space_info *data_sinfo;
2099         int ret = 0, committed = 0;
2100
2101         /* make sure bytes are sectorsize aligned */
2102         bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
2103
2104         data_sinfo = BTRFS_I(inode)->space_info;
2105 again:
2106         /* make sure we have enough space to handle the data first */
2107         spin_lock(&data_sinfo->lock);
2108         if (data_sinfo->total_bytes - data_sinfo->bytes_used -
2109             data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
2110             data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
2111             data_sinfo->bytes_may_use < bytes) {
2112                 struct btrfs_trans_handle *trans;
2113
2114                 /*
2115                  * if we don't have enough free bytes in this space then we need
2116                  * to alloc a new chunk.
2117                  */
2118                 if (!data_sinfo->full) {
2119                         u64 alloc_target;
2120
2121                         data_sinfo->force_alloc = 1;
2122                         spin_unlock(&data_sinfo->lock);
2123
2124                         alloc_target = btrfs_get_alloc_profile(root, 1);
2125                         trans = btrfs_start_transaction(root, 1);
2126                         if (!trans)
2127                                 return -ENOMEM;
2128
2129                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2130                                              bytes + 2 * 1024 * 1024,
2131                                              alloc_target, 0);
2132                         btrfs_end_transaction(trans, root);
2133                         if (ret)
2134                                 return ret;
2135                         goto again;
2136                 }
2137                 spin_unlock(&data_sinfo->lock);
2138
2139                 /* commit the current transaction and try again */
2140                 if (!committed) {
2141                         committed = 1;
2142                         trans = btrfs_join_transaction(root, 1);
2143                         if (!trans)
2144                                 return -ENOMEM;
2145                         ret = btrfs_commit_transaction(trans, root);
2146                         if (ret)
2147                                 return ret;
2148                         goto again;
2149                 }
2150
2151                 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
2152                        ", %llu bytes_used, %llu bytes_reserved, "
2153                        "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
2154                        "%llu total\n", bytes, data_sinfo->bytes_delalloc,
2155                        data_sinfo->bytes_used, data_sinfo->bytes_reserved,
2156                        data_sinfo->bytes_pinned, data_sinfo->bytes_readonly,
2157                        data_sinfo->bytes_may_use, data_sinfo->total_bytes);
2158                 return -ENOSPC;
2159         }
2160         data_sinfo->bytes_may_use += bytes;
2161         BTRFS_I(inode)->reserved_bytes += bytes;
2162         spin_unlock(&data_sinfo->lock);
2163
2164         return btrfs_check_metadata_free_space(root);
2165 }
2166
2167 /*
2168  * if there was an error for whatever reason after calling
2169  * btrfs_check_data_free_space, call this so we can cleanup the counters.
2170  */
2171 void btrfs_free_reserved_data_space(struct btrfs_root *root,
2172                                     struct inode *inode, u64 bytes)
2173 {
2174         struct btrfs_space_info *data_sinfo;
2175
2176         /* make sure bytes are sectorsize aligned */
2177         bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
2178
2179         data_sinfo = BTRFS_I(inode)->space_info;
2180         spin_lock(&data_sinfo->lock);
2181         data_sinfo->bytes_may_use -= bytes;
2182         BTRFS_I(inode)->reserved_bytes -= bytes;
2183         spin_unlock(&data_sinfo->lock);
2184 }
2185
2186 /* called when we are adding a delalloc extent to the inode's io_tree */
2187 void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
2188                                   u64 bytes)
2189 {
2190         struct btrfs_space_info *data_sinfo;
2191
2192         /* get the space info for where this inode will be storing its data */
2193         data_sinfo = BTRFS_I(inode)->space_info;
2194
2195         /* make sure we have enough space to handle the data first */
2196         spin_lock(&data_sinfo->lock);
2197         data_sinfo->bytes_delalloc += bytes;
2198
2199         /*
2200          * we are adding a delalloc extent without calling
2201          * btrfs_check_data_free_space first.  This happens on a weird
2202          * writepage condition, but shouldn't hurt our accounting
2203          */
2204         if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
2205                 data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
2206                 BTRFS_I(inode)->reserved_bytes = 0;
2207         } else {
2208                 data_sinfo->bytes_may_use -= bytes;
2209                 BTRFS_I(inode)->reserved_bytes -= bytes;
2210         }
2211
2212         spin_unlock(&data_sinfo->lock);
2213 }
2214
2215 /* called when we are clearing an delalloc extent from the inode's io_tree */
2216 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
2217                               u64 bytes)
2218 {
2219         struct btrfs_space_info *info;
2220
2221         info = BTRFS_I(inode)->space_info;
2222
2223         spin_lock(&info->lock);
2224         info->bytes_delalloc -= bytes;
2225         spin_unlock(&info->lock);
2226 }
2227
2228 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
2229                           struct btrfs_root *extent_root, u64 alloc_bytes,
2230                           u64 flags, int force)
2231 {
2232         struct btrfs_space_info *space_info;
2233         u64 thresh;
2234         int ret = 0;
2235
2236         mutex_lock(&extent_root->fs_info->chunk_mutex);
2237
2238         flags = btrfs_reduce_alloc_profile(extent_root, flags);
2239
2240         space_info = __find_space_info(extent_root->fs_info, flags);
2241         if (!space_info) {
2242                 ret = update_space_info(extent_root->fs_info, flags,
2243                                         0, 0, &space_info);
2244                 BUG_ON(ret);
2245         }
2246         BUG_ON(!space_info);
2247
2248         spin_lock(&space_info->lock);
2249         if (space_info->force_alloc) {
2250                 force = 1;
2251                 space_info->force_alloc = 0;
2252         }
2253         if (space_info->full) {
2254                 spin_unlock(&space_info->lock);
2255                 goto out;
2256         }
2257
2258         thresh = space_info->total_bytes - space_info->bytes_readonly;
2259         thresh = div_factor(thresh, 6);
2260         if (!force &&
2261            (space_info->bytes_used + space_info->bytes_pinned +
2262             space_info->bytes_reserved + alloc_bytes) < thresh) {
2263                 spin_unlock(&space_info->lock);
2264                 goto out;
2265         }
2266         spin_unlock(&space_info->lock);
2267
2268         ret = btrfs_alloc_chunk(trans, extent_root, flags);
2269         if (ret)
2270                 space_info->full = 1;
2271 out:
2272         mutex_unlock(&extent_root->fs_info->chunk_mutex);
2273         return ret;
2274 }
2275
2276 static int update_block_group(struct btrfs_trans_handle *trans,
2277                               struct btrfs_root *root,
2278                               u64 bytenr, u64 num_bytes, int alloc,
2279                               int mark_free)
2280 {
2281         struct btrfs_block_group_cache *cache;
2282         struct btrfs_fs_info *info = root->fs_info;
2283         u64 total = num_bytes;
2284         u64 old_val;
2285         u64 byte_in_group;
2286
2287         while (total) {
2288                 cache = btrfs_lookup_block_group(info, bytenr);
2289                 if (!cache)
2290                         return -1;
2291                 byte_in_group = bytenr - cache->key.objectid;
2292                 WARN_ON(byte_in_group > cache->key.offset);
2293
2294                 spin_lock(&cache->space_info->lock);
2295                 spin_lock(&cache->lock);
2296                 cache->dirty = 1;
2297                 old_val = btrfs_block_group_used(&cache->item);
2298                 num_bytes = min(total, cache->key.offset - byte_in_group);
2299                 if (alloc) {
2300                         old_val += num_bytes;
2301                         cache->space_info->bytes_used += num_bytes;
2302                         if (cache->ro)
2303                                 cache->space_info->bytes_readonly -= num_bytes;
2304                         btrfs_set_block_group_used(&cache->item, old_val);
2305                         spin_unlock(&cache->lock);
2306                         spin_unlock(&cache->space_info->lock);
2307                 } else {
2308                         old_val -= num_bytes;
2309                         cache->space_info->bytes_used -= num_bytes;
2310                         if (cache->ro)
2311                                 cache->space_info->bytes_readonly += num_bytes;
2312                         btrfs_set_block_group_used(&cache->item, old_val);
2313                         spin_unlock(&cache->lock);
2314                         spin_unlock(&cache->space_info->lock);
2315                         if (mark_free) {
2316                                 int ret;
2317
2318                                 ret = btrfs_discard_extent(root, bytenr,
2319                                                            num_bytes);
2320                                 WARN_ON(ret);
2321
2322                                 ret = btrfs_add_free_space(cache, bytenr,
2323                                                            num_bytes);
2324                                 WARN_ON(ret);
2325                         }
2326                 }
2327                 put_block_group(cache);
2328                 total -= num_bytes;
2329                 bytenr += num_bytes;
2330         }
2331         return 0;
2332 }
2333
2334 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
2335 {
2336         struct btrfs_block_group_cache *cache;
2337         u64 bytenr;
2338
2339         cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
2340         if (!cache)
2341                 return 0;
2342
2343         bytenr = cache->key.objectid;
2344         put_block_group(cache);
2345
2346         return bytenr;
2347 }
2348
2349 int btrfs_update_pinned_extents(struct btrfs_root *root,
2350                                 u64 bytenr, u64 num, int pin)
2351 {
2352         u64 len;
2353         struct btrfs_block_group_cache *cache;
2354         struct btrfs_fs_info *fs_info = root->fs_info;
2355
2356         WARN_ON(!mutex_is_locked(&root->fs_info->pinned_mutex));
2357         if (pin) {
2358                 set_extent_dirty(&fs_info->pinned_extents,
2359                                 bytenr, bytenr + num - 1, GFP_NOFS);
2360         } else {
2361                 clear_extent_dirty(&fs_info->pinned_extents,
2362                                 bytenr, bytenr + num - 1, GFP_NOFS);
2363         }
2364         while (num > 0) {
2365                 cache = btrfs_lookup_block_group(fs_info, bytenr);
2366                 BUG_ON(!cache);
2367                 len = min(num, cache->key.offset -
2368                           (bytenr - cache->key.objectid));
2369                 if (pin) {
2370                         spin_lock(&cache->space_info->lock);
2371                         spin_lock(&cache->lock);
2372                         cache->pinned += len;
2373                         cache->space_info->bytes_pinned += len;
2374                         spin_unlock(&cache->lock);
2375                         spin_unlock(&cache->space_info->lock);
2376                         fs_info->total_pinned += len;
2377                 } else {
2378                         spin_lock(&cache->space_info->lock);
2379                         spin_lock(&cache->lock);
2380                         cache->pinned -= len;
2381                         cache->space_info->bytes_pinned -= len;
2382                         spin_unlock(&cache->lock);
2383                         spin_unlock(&cache->space_info->lock);
2384                         fs_info->total_pinned -= len;
2385                         if (cache->cached)
2386                                 btrfs_add_free_space(cache, bytenr, len);
2387                 }
2388                 put_block_group(cache);
2389                 bytenr += len;
2390                 num -= len;
2391         }
2392         return 0;
2393 }
2394
2395 static int update_reserved_extents(struct btrfs_root *root,
2396                                    u64 bytenr, u64 num, int reserve)
2397 {
2398         u64 len;
2399         struct btrfs_block_group_cache *cache;
2400         struct btrfs_fs_info *fs_info = root->fs_info;
2401
2402         while (num > 0) {
2403                 cache = btrfs_lookup_block_group(fs_info, bytenr);
2404                 BUG_ON(!cache);
2405                 len = min(num, cache->key.offset -
2406                           (bytenr - cache->key.objectid));
2407
2408                 spin_lock(&cache->space_info->lock);
2409                 spin_lock(&cache->lock);
2410                 if (reserve) {
2411                         cache->reserved += len;
2412                         cache->space_info->bytes_reserved += len;
2413                 } else {
2414                         cache->reserved -= len;
2415                         cache->space_info->bytes_reserved -= len;
2416                 }
2417                 spin_unlock(&cache->lock);
2418                 spin_unlock(&cache->space_info->lock);
2419                 put_block_group(cache);
2420                 bytenr += len;
2421                 num -= len;
2422         }
2423         return 0;
2424 }
2425
2426 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
2427 {
2428         u64 last = 0;
2429         u64 start;
2430         u64 end;
2431         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
2432         int ret;
2433
2434         mutex_lock(&root->fs_info->pinned_mutex);
2435         while (1) {
2436                 ret = find_first_extent_bit(pinned_extents, last,
2437                                             &start, &end, EXTENT_DIRTY);
2438                 if (ret)
2439                         break;
2440                 set_extent_dirty(copy, start, end, GFP_NOFS);
2441                 last = end + 1;
2442         }
2443         mutex_unlock(&root->fs_info->pinned_mutex);
2444         return 0;
2445 }
2446
2447 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
2448                                struct btrfs_root *root,
2449                                struct extent_io_tree *unpin)
2450 {
2451         u64 start;
2452         u64 end;
2453         int ret;
2454
2455         mutex_lock(&root->fs_info->pinned_mutex);
2456         while (1) {
2457                 ret = find_first_extent_bit(unpin, 0, &start, &end,
2458                                             EXTENT_DIRTY);
2459                 if (ret)
2460                         break;
2461
2462                 ret = btrfs_discard_extent(root, start, end + 1 - start);
2463
2464                 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
2465                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
2466
2467                 if (need_resched()) {
2468                         mutex_unlock(&root->fs_info->pinned_mutex);
2469                         cond_resched();
2470                         mutex_lock(&root->fs_info->pinned_mutex);
2471                 }
2472         }
2473         mutex_unlock(&root->fs_info->pinned_mutex);
2474         return ret;
2475 }
2476
2477 static int finish_current_insert(struct btrfs_trans_handle *trans,
2478                                  struct btrfs_root *extent_root, int all)
2479 {
2480         u64 start;
2481         u64 end;
2482         u64 priv;
2483         u64 search = 0;
2484         struct btrfs_fs_info *info = extent_root->fs_info;
2485         struct btrfs_path *path;
2486         struct pending_extent_op *extent_op, *tmp;
2487         struct list_head insert_list, update_list;
2488         int ret;
2489         int num_inserts = 0, max_inserts, restart = 0;
2490
2491         path = btrfs_alloc_path();
2492         INIT_LIST_HEAD(&insert_list);
2493         INIT_LIST_HEAD(&update_list);
2494
2495         max_inserts = extent_root->leafsize /
2496                 (2 * sizeof(struct btrfs_key) + 2 * sizeof(struct btrfs_item) +
2497                  sizeof(struct btrfs_extent_ref) +
2498                  sizeof(struct btrfs_extent_item));
2499 again:
2500         mutex_lock(&info->extent_ins_mutex);
2501         while (1) {
2502                 ret = find_first_extent_bit(&info->extent_ins, search, &start,
2503                                             &end, EXTENT_WRITEBACK);
2504                 if (ret) {
2505                         if (restart && !num_inserts &&
2506                             list_empty(&update_list)) {
2507                                 restart = 0;
2508                                 search = 0;
2509                                 continue;
2510                         }
2511                         break;
2512                 }
2513
2514                 ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS);
2515                 if (!ret) {
2516                         if (all)
2517                                 restart = 1;
2518                         search = end + 1;
2519                         if (need_resched()) {
2520                                 mutex_unlock(&info->extent_ins_mutex);
2521                                 cond_resched();
2522                                 mutex_lock(&info->extent_ins_mutex);
2523                         }
2524                         continue;
2525                 }
2526
2527                 ret = get_state_private(&info->extent_ins, start, &priv);
2528                 BUG_ON(ret);
2529                 extent_op = (struct pending_extent_op *)(unsigned long) priv;
2530
2531                 if (extent_op->type == PENDING_EXTENT_INSERT) {
2532                         num_inserts++;
2533                         list_add_tail(&extent_op->list, &insert_list);
2534                         search = end + 1;
2535                         if (num_inserts == max_inserts) {
2536                                 restart = 1;
2537                                 break;
2538                         }
2539                 } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
2540                         list_add_tail(&extent_op->list, &update_list);
2541                         search = end + 1;
2542                 } else {
2543                         BUG();
2544                 }
2545         }
2546
2547         /*
2548          * process the update list, clear the writeback bit for it, and if
2549          * somebody marked this thing for deletion then just unlock it and be
2550          * done, the free_extents will handle it
2551          */
2552         list_for_each_entry_safe(extent_op, tmp, &update_list, list) {
2553                 clear_extent_bits(&info->extent_ins, extent_op->bytenr,
2554                                   extent_op->bytenr + extent_op->num_bytes - 1,
2555                                   EXTENT_WRITEBACK, GFP_NOFS);
2556                 if (extent_op->del) {
2557                         list_del_init(&extent_op->list);
2558                         unlock_extent(&info->extent_ins, extent_op->bytenr,
2559                                       extent_op->bytenr + extent_op->num_bytes
2560                                       - 1, GFP_NOFS);
2561                         kfree(extent_op);
2562                 }
2563         }
2564         mutex_unlock(&info->extent_ins_mutex);
2565
2566         /*
2567          * still have things left on the update list, go ahead an update
2568          * everything
2569          */
2570         if (!list_empty(&update_list)) {
2571                 ret = update_backrefs(trans, extent_root, path, &update_list);
2572                 BUG_ON(ret);
2573
2574                 /* we may have COW'ed new blocks, so lets start over */
2575                 if (all)
2576                         restart = 1;
2577         }
2578
2579         /*
2580          * if no inserts need to be done, but we skipped some extents and we
2581          * need to make sure everything is cleaned then reset everything and
2582          * go back to the beginning
2583          */
2584         if (!num_inserts && restart) {
2585                 search = 0;
2586                 restart = 0;
2587                 INIT_LIST_HEAD(&update_list);
2588                 INIT_LIST_HEAD(&insert_list);
2589                 goto again;
2590         } else if (!num_inserts) {
2591                 goto out;
2592         }
2593
2594         /*
2595          * process the insert extents list.  Again if we are deleting this
2596          * extent, then just unlock it, pin down the bytes if need be, and be
2597          * done with it.  Saves us from having to actually insert the extent
2598          * into the tree and then subsequently come along and delete it
2599          */
2600         mutex_lock(&info->extent_ins_mutex);
2601         list_for_each_entry_safe(extent_op, tmp, &insert_list, list) {
2602                 clear_extent_bits(&info->extent_ins, extent_op->bytenr,
2603                                   extent_op->bytenr + extent_op->num_bytes - 1,
2604                                   EXTENT_WRITEBACK, GFP_NOFS);
2605                 if (extent_op->del) {
2606                         u64 used;
2607                         list_del_init(&extent_op->list);
2608                         unlock_extent(&info->extent_ins, extent_op->bytenr,
2609                                       extent_op->bytenr + extent_op->num_bytes
2610                                       - 1, GFP_NOFS);
2611
2612                         mutex_lock(&extent_root->fs_info->pinned_mutex);
2613                         ret = pin_down_bytes(trans, extent_root,
2614                                              extent_op->bytenr,
2615                                              extent_op->num_bytes, 0);
2616                         mutex_unlock(&extent_root->fs_info->pinned_mutex);
2617
2618                         spin_lock(&info->delalloc_lock);
2619                         used = btrfs_super_bytes_used(&info->super_copy);
2620                         btrfs_set_super_bytes_used(&info->super_copy,
2621                                         used - extent_op->num_bytes);
2622                         used = btrfs_root_used(&extent_root->root_item);
2623                         btrfs_set_root_used(&extent_root->root_item,
2624                                         used - extent_op->num_bytes);
2625                         spin_unlock(&info->delalloc_lock);
2626
2627                         ret = update_block_group(trans, extent_root,
2628                                                  extent_op->bytenr,
2629                                                  extent_op->num_bytes,
2630                                                  0, ret > 0);
2631                         BUG_ON(ret);
2632                         kfree(extent_op);
2633                         num_inserts--;
2634                 }
2635         }
2636         mutex_unlock(&info->extent_ins_mutex);
2637
2638         ret = insert_extents(trans, extent_root, path, &insert_list,
2639                              num_inserts);
2640         BUG_ON(ret);
2641
2642         /*
2643          * if restart is set for whatever reason we need to go back and start
2644          * searching through the pending list again.
2645          *
2646          * We just inserted some extents, which could have resulted in new
2647          * blocks being allocated, which would result in new blocks needing
2648          * updates, so if all is set we _must_ restart to get the updated
2649          * blocks.
2650          */
2651         if (restart || all) {
2652                 INIT_LIST_HEAD(&insert_list);
2653                 INIT_LIST_HEAD(&update_list);
2654                 search = 0;
2655                 restart = 0;
2656                 num_inserts = 0;
2657                 goto again;
2658         }
2659 out:
2660         btrfs_free_path(path);
2661         return 0;
2662 }
2663
2664 static int pin_down_bytes(struct btrfs_trans_handle *trans,
2665                           struct btrfs_root *root,
2666                           u64 bytenr, u64 num_bytes, int is_data)
2667 {
2668         int err = 0;
2669         struct extent_buffer *buf;
2670
2671         if (is_data)
2672                 goto pinit;
2673
2674         buf = btrfs_find_tree_block(root, bytenr, num_bytes);
2675         if (!buf)
2676                 goto pinit;
2677
2678         /* we can reuse a block if it hasn't been written
2679          * and it is from this transaction.  We can't
2680          * reuse anything from the tree log root because
2681          * it has tiny sub-transactions.
2682          */
2683         if (btrfs_buffer_uptodate(buf, 0) &&
2684             btrfs_try_tree_lock(buf)) {
2685                 u64 header_owner = btrfs_header_owner(buf);
2686                 u64 header_transid = btrfs_header_generation(buf);
2687                 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
2688                     header_owner != BTRFS_TREE_RELOC_OBJECTID &&
2689                     header_transid == trans->transid &&
2690                     !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
2691                         clean_tree_block(NULL, root, buf);
2692                         btrfs_tree_unlock(buf);
2693                         free_extent_buffer(buf);
2694                         return 1;
2695                 }
2696                 btrfs_tree_unlock(buf);
2697         }
2698         free_extent_buffer(buf);
2699 pinit:
2700         btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
2701
2702         BUG_ON(err < 0);
2703         return 0;
2704 }
2705
2706 /*
2707  * remove an extent from the root, returns 0 on success
2708  */
2709 static int __free_extent(struct btrfs_trans_handle *trans,
2710                          struct btrfs_root *root,
2711                          u64 bytenr, u64 num_bytes, u64 parent,
2712                          u64 root_objectid, u64 ref_generation,
2713                          u64 owner_objectid, int pin, int mark_free)
2714 {
2715         struct btrfs_path *path;
2716         struct btrfs_key key;
2717         struct btrfs_fs_info *info = root->fs_info;
2718         struct btrfs_root *extent_root = info->extent_root;
2719         struct extent_buffer *leaf;
2720         int ret;
2721         int extent_slot = 0;
2722         int found_extent = 0;
2723         int num_to_del = 1;
2724         struct btrfs_extent_item *ei;
2725         u32 refs;
2726
2727         key.objectid = bytenr;
2728         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
2729         key.offset = num_bytes;
2730         path = btrfs_alloc_path();
2731         if (!path)
2732                 return -ENOMEM;
2733
2734         path->reada = 1;
2735         ret = lookup_extent_backref(trans, extent_root, path,
2736                                     bytenr, parent, root_objectid,
2737                                     ref_generation, owner_objectid, 1);
2738         if (ret == 0) {
2739                 struct btrfs_key found_key;
2740                 extent_slot = path->slots[0];
2741                 while (extent_slot > 0) {
2742                         extent_slot--;
2743                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2744                                               extent_slot);
2745                         if (found_key.objectid != bytenr)
2746                                 break;
2747                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
2748                             found_key.offset == num_bytes) {
2749                                 found_extent = 1;
2750                                 break;
2751                         }
2752                         if (path->slots[0] - extent_slot > 5)
2753                                 break;
2754                 }
2755                 if (!found_extent) {
2756                         ret = remove_extent_backref(trans, extent_root, path);
2757                         BUG_ON(ret);
2758                         btrfs_release_path(extent_root, path);
2759                         ret = btrfs_search_slot(trans, extent_root,
2760                                                 &key, path, -1, 1);
2761                         if (ret) {
2762                                 printk(KERN_ERR "umm, got %d back from search"
2763                                        ", was looking for %llu\n", ret,
2764                                        (unsigned long long)bytenr);
2765                                 btrfs_print_leaf(extent_root, path->nodes[0]);
2766                         }
2767                         BUG_ON(ret);
2768                         extent_slot = path->slots[0];
2769                 }
2770         } else {
2771                 btrfs_print_leaf(extent_root, path->nodes[0]);
2772                 WARN_ON(1);
2773                 printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
2774                        "root %llu gen %llu owner %llu\n",
2775                        (unsigned long long)bytenr,
2776                        (unsigned long long)root_objectid,
2777                        (unsigned long long)ref_generation,
2778                        (unsigned long long)owner_objectid);
2779         }
2780
2781         leaf = path->nodes[0];
2782         ei = btrfs_item_ptr(leaf, extent_slot,
2783                             struct btrfs_extent_item);
2784         refs = btrfs_extent_refs(leaf, ei);
2785         BUG_ON(refs == 0);
2786         refs -= 1;
2787         btrfs_set_extent_refs(leaf, ei, refs);
2788
2789         btrfs_mark_buffer_dirty(leaf);
2790
2791         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
2792                 struct btrfs_extent_ref *ref;
2793                 ref = btrfs_item_ptr(leaf, path->slots[0],
2794                                      struct btrfs_extent_ref);
2795                 BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1);
2796                 /* if the back ref and the extent are next to each other
2797                  * they get deleted below in one shot
2798                  */
2799                 path->slots[0] = extent_slot;
2800                 num_to_del = 2;
2801         } else if (found_extent) {
2802                 /* otherwise delete the extent back ref */
2803                 ret = remove_extent_backref(trans, extent_root, path);
2804                 BUG_ON(ret);
2805                 /* if refs are 0, we need to setup the path for deletion */
2806                 if (refs == 0) {
2807                         btrfs_release_path(extent_root, path);
2808                         ret = btrfs_search_slot(trans, extent_root, &key, path,
2809                                                 -1, 1);
2810                         BUG_ON(ret);
2811                 }
2812         }
2813
2814         if (refs == 0) {
2815                 u64 super_used;
2816                 u64 root_used;
2817
2818                 if (pin) {
2819                         mutex_lock(&root->fs_info->pinned_mutex);
2820                         ret = pin_down_bytes(trans, root, bytenr, num_bytes,
2821                                 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID);
2822                         mutex_unlock(&root->fs_info->pinned_mutex);
2823                         if (ret > 0)
2824                                 mark_free = 1;
2825                         BUG_ON(ret < 0);
2826                 }
2827                 /* block accounting for super block */
2828                 spin_lock(&info->delalloc_lock);
2829                 super_used = btrfs_super_bytes_used(&info->super_copy);
2830                 btrfs_set_super_bytes_used(&info->super_copy,
2831                                            super_used - num_bytes);
2832
2833                 /* block accounting for root item */
2834                 root_used = btrfs_root_used(&root->root_item);
2835                 btrfs_set_root_used(&root->root_item,
2836                                            root_used - num_bytes);
2837                 spin_unlock(&info->delalloc_lock);
2838                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
2839                                       num_to_del);
2840                 BUG_ON(ret);
2841                 btrfs_release_path(extent_root, path);
2842
2843                 if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2844                         ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
2845                         BUG_ON(ret);
2846                 }
2847
2848                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
2849                                          mark_free);
2850                 BUG_ON(ret);
2851         }
2852         btrfs_free_path(path);
2853         finish_current_insert(trans, extent_root, 0);
2854         return ret;
2855 }
2856
2857 /*
2858  * find all the blocks marked as pending in the radix tree and remove
2859  * them from the extent map
2860  */
2861 static int del_pending_extents(struct btrfs_trans_handle *trans,
2862                                struct btrfs_root *extent_root, int all)
2863 {
2864         int ret;
2865         int err = 0;
2866         u64 start;
2867         u64 end;
2868         u64 priv;
2869         u64 search = 0;
2870         int nr = 0, skipped = 0;
2871         struct extent_io_tree *pending_del;
2872         struct extent_io_tree *extent_ins;
2873         struct pending_extent_op *extent_op;
2874         struct btrfs_fs_info *info = extent_root->fs_info;
2875         struct list_head delete_list;
2876
2877         INIT_LIST_HEAD(&delete_list);
2878         extent_ins = &extent_root->fs_info->extent_ins;
2879         pending_del = &extent_root->fs_info->pending_del;
2880
2881 again:
2882         mutex_lock(&info->extent_ins_mutex);
2883         while (1) {
2884                 ret = find_first_extent_bit(pending_del, search, &start, &end,
2885                                             EXTENT_WRITEBACK);
2886                 if (ret) {
2887                         if (all && skipped && !nr) {
2888                                 search = 0;
2889                                 skipped = 0;
2890                                 continue;
2891                         }
2892                         mutex_unlock(&info->extent_ins_mutex);
2893                         break;
2894                 }
2895
2896                 ret = try_lock_extent(extent_ins, start, end, GFP_NOFS);
2897                 if (!ret) {
2898                         search = end+1;
2899                         skipped = 1;
2900
2901                         if (need_resched()) {
2902                                 mutex_unlock(&info->extent_ins_mutex);
2903                                 cond_resched();
2904                                 mutex_lock(&info->extent_ins_mutex);
2905                         }
2906
2907                         continue;
2908                 }
2909                 BUG_ON(ret < 0);
2910
2911                 ret = get_state_private(pending_del, start, &priv);
2912                 BUG_ON(ret);
2913                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2914
2915                 clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK,
2916                                   GFP_NOFS);
2917                 if (!test_range_bit(extent_ins, start, end,
2918                                     EXTENT_WRITEBACK, 0)) {
2919                         list_add_tail(&extent_op->list, &delete_list);
2920                         nr++;
2921                 } else {
2922                         kfree(extent_op);
2923
2924                         ret = get_state_private(&info->extent_ins, start,
2925                                                 &priv);
2926                         BUG_ON(ret);
2927                         extent_op = (struct pending_extent_op *)
2928                                                 (unsigned long)priv;
2929
2930                         clear_extent_bits(&info->extent_ins, start, end,
2931                                           EXTENT_WRITEBACK, GFP_NOFS);
2932
2933                         if (extent_op->type == PENDING_BACKREF_UPDATE) {
2934                                 list_add_tail(&extent_op->list, &delete_list);
2935                                 search = end + 1;
2936                                 nr++;
2937                                 continue;
2938                         }
2939
2940                         mutex_lock(&extent_root->fs_info->pinned_mutex);
2941                         ret = pin_down_bytes(trans, extent_root, start,
2942                                              end + 1 - start, 0);
2943                         mutex_unlock(&extent_root->fs_info->pinned_mutex);
2944
2945                         ret = update_block_group(trans, extent_root, start,
2946                                                 end + 1 - start, 0, ret > 0);
2947
2948                         unlock_extent(extent_ins, start, end, GFP_NOFS);
2949                         BUG_ON(ret);
2950                         kfree(extent_op);
2951                 }
2952                 if (ret)
2953                         err = ret;
2954
2955                 search = end + 1;
2956
2957                 if (need_resched()) {
2958                         mutex_unlock(&info->extent_ins_mutex);
2959                         cond_resched();
2960                         mutex_lock(&info->extent_ins_mutex);
2961                 }
2962         }
2963
2964         if (nr) {
2965                 ret = free_extents(trans, extent_root, &delete_list);
2966                 BUG_ON(ret);
2967         }
2968
2969         if (all && skipped) {
2970                 INIT_LIST_HEAD(&delete_list);
2971                 search = 0;
2972                 nr = 0;
2973                 goto again;
2974         }
2975
2976         if (!err)
2977                 finish_current_insert(trans, extent_root, 0);
2978         return err;
2979 }
2980
2981 /*
2982  * remove an extent from the root, returns 0 on success
2983  */
2984 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2985                                struct btrfs_root *root,
2986                                u64 bytenr, u64 num_bytes, u64 parent,
2987                                u64 root_objectid, u64 ref_generation,
2988                                u64 owner_objectid, int pin)
2989 {
2990         struct btrfs_root *extent_root = root->fs_info->extent_root;
2991         int pending_ret;
2992         int ret;
2993
2994         WARN_ON(num_bytes < root->sectorsize);
2995         if (root == extent_root) {
2996                 struct pending_extent_op *extent_op = NULL;
2997
2998                 mutex_lock(&root->fs_info->extent_ins_mutex);
2999                 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
3000                                 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
3001                         u64 priv;
3002                         ret = get_state_private(&root->fs_info->extent_ins,
3003                                                 bytenr, &priv);
3004                         BUG_ON(ret);
3005                         extent_op = (struct pending_extent_op *)
3006                                                 (unsigned long)priv;
3007
3008                         extent_op->del = 1;
3009                         if (extent_op->type == PENDING_EXTENT_INSERT) {
3010                                 mutex_unlock(&root->fs_info->extent_ins_mutex);
3011                                 return 0;
3012                         }
3013                 }
3014
3015                 if (extent_op) {
3016                         ref_generation = extent_op->orig_generation;
3017                         parent = extent_op->orig_parent;
3018                 }
3019
3020                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
3021                 BUG_ON(!extent_op);
3022
3023                 extent_op->type = PENDING_EXTENT_DELETE;
3024                 extent_op->bytenr = bytenr;
3025                 extent_op->num_bytes = num_bytes;
3026                 extent_op->parent = parent;
3027                 extent_op->orig_parent = parent;
3028                 extent_op->generation = ref_generation;
3029                 extent_op->orig_generation = ref_generation;
3030                 extent_op->level = (int)owner_objectid;
3031                 INIT_LIST_HEAD(&extent_op->list);
3032                 extent_op->del = 0;
3033
3034                 set_extent_bits(&root->fs_info->pending_del,
3035                                 bytenr, bytenr + num_bytes - 1,
3036                                 EXTENT_WRITEBACK, GFP_NOFS);
3037                 set_state_private(&root->fs_info->pending_del,
3038                                   bytenr, (unsigned l