569f9ab65ceb6b36503e4c43035d3194b58db6c5
[sfrench/cifs-2.6.git] / fs / btrfs / qgroup.c
1 /*
2  * Copyright (C) 2011 STRATO.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/pagemap.h>
21 #include <linux/writeback.h>
22 #include <linux/blkdev.h>
23 #include <linux/rbtree.h>
24 #include <linux/slab.h>
25 #include <linux/workqueue.h>
26 #include <linux/btrfs.h>
27
28 #include "ctree.h"
29 #include "transaction.h"
30 #include "disk-io.h"
31 #include "locking.h"
32 #include "ulist.h"
33 #include "backref.h"
34 #include "extent_io.h"
35 #include "qgroup.h"
36
37
38 /* TODO XXX FIXME
39  *  - subvol delete -> delete when ref goes to 0? delete limits also?
40  *  - reorganize keys
41  *  - compressed
42  *  - sync
43  *  - copy also limits on subvol creation
44  *  - limit
45  *  - caches fuer ulists
46  *  - performance benchmarks
47  *  - check all ioctl parameters
48  */
49
50 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
51                                            int mod)
52 {
53         if (qg->old_refcnt < seq)
54                 qg->old_refcnt = seq;
55         qg->old_refcnt += mod;
56 }
57
58 static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq,
59                                            int mod)
60 {
61         if (qg->new_refcnt < seq)
62                 qg->new_refcnt = seq;
63         qg->new_refcnt += mod;
64 }
65
66 static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq)
67 {
68         if (qg->old_refcnt < seq)
69                 return 0;
70         return qg->old_refcnt - seq;
71 }
72
73 static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq)
74 {
75         if (qg->new_refcnt < seq)
76                 return 0;
77         return qg->new_refcnt - seq;
78 }
79
80 /*
81  * glue structure to represent the relations between qgroups.
82  */
83 struct btrfs_qgroup_list {
84         struct list_head next_group;
85         struct list_head next_member;
86         struct btrfs_qgroup *group;
87         struct btrfs_qgroup *member;
88 };
89
90 static inline u64 qgroup_to_aux(struct btrfs_qgroup *qg)
91 {
92         return (u64)(uintptr_t)qg;
93 }
94
95 static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n)
96 {
97         return (struct btrfs_qgroup *)(uintptr_t)n->aux;
98 }
99
100 static int
101 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
102                    int init_flags);
103 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
104
105 /* must be called with qgroup_ioctl_lock held */
106 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
107                                            u64 qgroupid)
108 {
109         struct rb_node *n = fs_info->qgroup_tree.rb_node;
110         struct btrfs_qgroup *qgroup;
111
112         while (n) {
113                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
114                 if (qgroup->qgroupid < qgroupid)
115                         n = n->rb_left;
116                 else if (qgroup->qgroupid > qgroupid)
117                         n = n->rb_right;
118                 else
119                         return qgroup;
120         }
121         return NULL;
122 }
123
124 /* must be called with qgroup_lock held */
125 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
126                                           u64 qgroupid)
127 {
128         struct rb_node **p = &fs_info->qgroup_tree.rb_node;
129         struct rb_node *parent = NULL;
130         struct btrfs_qgroup *qgroup;
131
132         while (*p) {
133                 parent = *p;
134                 qgroup = rb_entry(parent, struct btrfs_qgroup, node);
135
136                 if (qgroup->qgroupid < qgroupid)
137                         p = &(*p)->rb_left;
138                 else if (qgroup->qgroupid > qgroupid)
139                         p = &(*p)->rb_right;
140                 else
141                         return qgroup;
142         }
143
144         qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
145         if (!qgroup)
146                 return ERR_PTR(-ENOMEM);
147
148         qgroup->qgroupid = qgroupid;
149         INIT_LIST_HEAD(&qgroup->groups);
150         INIT_LIST_HEAD(&qgroup->members);
151         INIT_LIST_HEAD(&qgroup->dirty);
152
153         rb_link_node(&qgroup->node, parent, p);
154         rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
155
156         return qgroup;
157 }
158
159 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
160 {
161         struct btrfs_qgroup_list *list;
162
163         list_del(&qgroup->dirty);
164         while (!list_empty(&qgroup->groups)) {
165                 list = list_first_entry(&qgroup->groups,
166                                         struct btrfs_qgroup_list, next_group);
167                 list_del(&list->next_group);
168                 list_del(&list->next_member);
169                 kfree(list);
170         }
171
172         while (!list_empty(&qgroup->members)) {
173                 list = list_first_entry(&qgroup->members,
174                                         struct btrfs_qgroup_list, next_member);
175                 list_del(&list->next_group);
176                 list_del(&list->next_member);
177                 kfree(list);
178         }
179         kfree(qgroup);
180 }
181
182 /* must be called with qgroup_lock held */
183 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
184 {
185         struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
186
187         if (!qgroup)
188                 return -ENOENT;
189
190         rb_erase(&qgroup->node, &fs_info->qgroup_tree);
191         __del_qgroup_rb(qgroup);
192         return 0;
193 }
194
195 /* must be called with qgroup_lock held */
196 static int add_relation_rb(struct btrfs_fs_info *fs_info,
197                            u64 memberid, u64 parentid)
198 {
199         struct btrfs_qgroup *member;
200         struct btrfs_qgroup *parent;
201         struct btrfs_qgroup_list *list;
202
203         member = find_qgroup_rb(fs_info, memberid);
204         parent = find_qgroup_rb(fs_info, parentid);
205         if (!member || !parent)
206                 return -ENOENT;
207
208         list = kzalloc(sizeof(*list), GFP_ATOMIC);
209         if (!list)
210                 return -ENOMEM;
211
212         list->group = parent;
213         list->member = member;
214         list_add_tail(&list->next_group, &member->groups);
215         list_add_tail(&list->next_member, &parent->members);
216
217         return 0;
218 }
219
220 /* must be called with qgroup_lock held */
221 static int del_relation_rb(struct btrfs_fs_info *fs_info,
222                            u64 memberid, u64 parentid)
223 {
224         struct btrfs_qgroup *member;
225         struct btrfs_qgroup *parent;
226         struct btrfs_qgroup_list *list;
227
228         member = find_qgroup_rb(fs_info, memberid);
229         parent = find_qgroup_rb(fs_info, parentid);
230         if (!member || !parent)
231                 return -ENOENT;
232
233         list_for_each_entry(list, &member->groups, next_group) {
234                 if (list->group == parent) {
235                         list_del(&list->next_group);
236                         list_del(&list->next_member);
237                         kfree(list);
238                         return 0;
239                 }
240         }
241         return -ENOENT;
242 }
243
244 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
245 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
246                                u64 rfer, u64 excl)
247 {
248         struct btrfs_qgroup *qgroup;
249
250         qgroup = find_qgroup_rb(fs_info, qgroupid);
251         if (!qgroup)
252                 return -EINVAL;
253         if (qgroup->rfer != rfer || qgroup->excl != excl)
254                 return -EINVAL;
255         return 0;
256 }
257 #endif
258
259 /*
260  * The full config is read in one go, only called from open_ctree()
261  * It doesn't use any locking, as at this point we're still single-threaded
262  */
263 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
264 {
265         struct btrfs_key key;
266         struct btrfs_key found_key;
267         struct btrfs_root *quota_root = fs_info->quota_root;
268         struct btrfs_path *path = NULL;
269         struct extent_buffer *l;
270         int slot;
271         int ret = 0;
272         u64 flags = 0;
273         u64 rescan_progress = 0;
274
275         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
276                 return 0;
277
278         fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
279         if (!fs_info->qgroup_ulist) {
280                 ret = -ENOMEM;
281                 goto out;
282         }
283
284         path = btrfs_alloc_path();
285         if (!path) {
286                 ret = -ENOMEM;
287                 goto out;
288         }
289
290         /* default this to quota off, in case no status key is found */
291         fs_info->qgroup_flags = 0;
292
293         /*
294          * pass 1: read status, all qgroup infos and limits
295          */
296         key.objectid = 0;
297         key.type = 0;
298         key.offset = 0;
299         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
300         if (ret)
301                 goto out;
302
303         while (1) {
304                 struct btrfs_qgroup *qgroup;
305
306                 slot = path->slots[0];
307                 l = path->nodes[0];
308                 btrfs_item_key_to_cpu(l, &found_key, slot);
309
310                 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
311                         struct btrfs_qgroup_status_item *ptr;
312
313                         ptr = btrfs_item_ptr(l, slot,
314                                              struct btrfs_qgroup_status_item);
315
316                         if (btrfs_qgroup_status_version(l, ptr) !=
317                             BTRFS_QGROUP_STATUS_VERSION) {
318                                 btrfs_err(fs_info,
319                                  "old qgroup version, quota disabled");
320                                 goto out;
321                         }
322                         if (btrfs_qgroup_status_generation(l, ptr) !=
323                             fs_info->generation) {
324                                 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
325                                 btrfs_err(fs_info,
326                                         "qgroup generation mismatch, marked as inconsistent");
327                         }
328                         fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
329                                                                           ptr);
330                         rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
331                         goto next1;
332                 }
333
334                 if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
335                     found_key.type != BTRFS_QGROUP_LIMIT_KEY)
336                         goto next1;
337
338                 qgroup = find_qgroup_rb(fs_info, found_key.offset);
339                 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
340                     (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
341                         btrfs_err(fs_info, "inconsistent qgroup config");
342                         flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
343                 }
344                 if (!qgroup) {
345                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
346                         if (IS_ERR(qgroup)) {
347                                 ret = PTR_ERR(qgroup);
348                                 goto out;
349                         }
350                 }
351                 switch (found_key.type) {
352                 case BTRFS_QGROUP_INFO_KEY: {
353                         struct btrfs_qgroup_info_item *ptr;
354
355                         ptr = btrfs_item_ptr(l, slot,
356                                              struct btrfs_qgroup_info_item);
357                         qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
358                         qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
359                         qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
360                         qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
361                         /* generation currently unused */
362                         break;
363                 }
364                 case BTRFS_QGROUP_LIMIT_KEY: {
365                         struct btrfs_qgroup_limit_item *ptr;
366
367                         ptr = btrfs_item_ptr(l, slot,
368                                              struct btrfs_qgroup_limit_item);
369                         qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
370                         qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
371                         qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
372                         qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
373                         qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
374                         break;
375                 }
376                 }
377 next1:
378                 ret = btrfs_next_item(quota_root, path);
379                 if (ret < 0)
380                         goto out;
381                 if (ret)
382                         break;
383         }
384         btrfs_release_path(path);
385
386         /*
387          * pass 2: read all qgroup relations
388          */
389         key.objectid = 0;
390         key.type = BTRFS_QGROUP_RELATION_KEY;
391         key.offset = 0;
392         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
393         if (ret)
394                 goto out;
395         while (1) {
396                 slot = path->slots[0];
397                 l = path->nodes[0];
398                 btrfs_item_key_to_cpu(l, &found_key, slot);
399
400                 if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
401                         goto next2;
402
403                 if (found_key.objectid > found_key.offset) {
404                         /* parent <- member, not needed to build config */
405                         /* FIXME should we omit the key completely? */
406                         goto next2;
407                 }
408
409                 ret = add_relation_rb(fs_info, found_key.objectid,
410                                       found_key.offset);
411                 if (ret == -ENOENT) {
412                         btrfs_warn(fs_info,
413                                 "orphan qgroup relation 0x%llx->0x%llx",
414                                 found_key.objectid, found_key.offset);
415                         ret = 0;        /* ignore the error */
416                 }
417                 if (ret)
418                         goto out;
419 next2:
420                 ret = btrfs_next_item(quota_root, path);
421                 if (ret < 0)
422                         goto out;
423                 if (ret)
424                         break;
425         }
426 out:
427         fs_info->qgroup_flags |= flags;
428         if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
429                 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
430         else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
431                  ret >= 0)
432                 ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
433         btrfs_free_path(path);
434
435         if (ret < 0) {
436                 ulist_free(fs_info->qgroup_ulist);
437                 fs_info->qgroup_ulist = NULL;
438                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
439         }
440
441         return ret < 0 ? ret : 0;
442 }
443
444 /*
445  * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
446  * first two are in single-threaded paths.And for the third one, we have set
447  * quota_root to be null with qgroup_lock held before, so it is safe to clean
448  * up the in-memory structures without qgroup_lock held.
449  */
450 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
451 {
452         struct rb_node *n;
453         struct btrfs_qgroup *qgroup;
454
455         while ((n = rb_first(&fs_info->qgroup_tree))) {
456                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
457                 rb_erase(n, &fs_info->qgroup_tree);
458                 __del_qgroup_rb(qgroup);
459         }
460         /*
461          * we call btrfs_free_qgroup_config() when umounting
462          * filesystem and disabling quota, so we set qgroup_ulist
463          * to be null here to avoid double free.
464          */
465         ulist_free(fs_info->qgroup_ulist);
466         fs_info->qgroup_ulist = NULL;
467 }
468
469 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
470                                     struct btrfs_root *quota_root,
471                                     u64 src, u64 dst)
472 {
473         int ret;
474         struct btrfs_path *path;
475         struct btrfs_key key;
476
477         path = btrfs_alloc_path();
478         if (!path)
479                 return -ENOMEM;
480
481         key.objectid = src;
482         key.type = BTRFS_QGROUP_RELATION_KEY;
483         key.offset = dst;
484
485         ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
486
487         btrfs_mark_buffer_dirty(path->nodes[0]);
488
489         btrfs_free_path(path);
490         return ret;
491 }
492
493 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
494                                     struct btrfs_root *quota_root,
495                                     u64 src, u64 dst)
496 {
497         int ret;
498         struct btrfs_path *path;
499         struct btrfs_key key;
500
501         path = btrfs_alloc_path();
502         if (!path)
503                 return -ENOMEM;
504
505         key.objectid = src;
506         key.type = BTRFS_QGROUP_RELATION_KEY;
507         key.offset = dst;
508
509         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
510         if (ret < 0)
511                 goto out;
512
513         if (ret > 0) {
514                 ret = -ENOENT;
515                 goto out;
516         }
517
518         ret = btrfs_del_item(trans, quota_root, path);
519 out:
520         btrfs_free_path(path);
521         return ret;
522 }
523
524 static int add_qgroup_item(struct btrfs_trans_handle *trans,
525                            struct btrfs_root *quota_root, u64 qgroupid)
526 {
527         int ret;
528         struct btrfs_path *path;
529         struct btrfs_qgroup_info_item *qgroup_info;
530         struct btrfs_qgroup_limit_item *qgroup_limit;
531         struct extent_buffer *leaf;
532         struct btrfs_key key;
533
534         if (btrfs_is_testing(quota_root->fs_info))
535                 return 0;
536
537         path = btrfs_alloc_path();
538         if (!path)
539                 return -ENOMEM;
540
541         key.objectid = 0;
542         key.type = BTRFS_QGROUP_INFO_KEY;
543         key.offset = qgroupid;
544
545         /*
546          * Avoid a transaction abort by catching -EEXIST here. In that
547          * case, we proceed by re-initializing the existing structure
548          * on disk.
549          */
550
551         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
552                                       sizeof(*qgroup_info));
553         if (ret && ret != -EEXIST)
554                 goto out;
555
556         leaf = path->nodes[0];
557         qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
558                                  struct btrfs_qgroup_info_item);
559         btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
560         btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
561         btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
562         btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
563         btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
564
565         btrfs_mark_buffer_dirty(leaf);
566
567         btrfs_release_path(path);
568
569         key.type = BTRFS_QGROUP_LIMIT_KEY;
570         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
571                                       sizeof(*qgroup_limit));
572         if (ret && ret != -EEXIST)
573                 goto out;
574
575         leaf = path->nodes[0];
576         qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
577                                   struct btrfs_qgroup_limit_item);
578         btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
579         btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
580         btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
581         btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
582         btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
583
584         btrfs_mark_buffer_dirty(leaf);
585
586         ret = 0;
587 out:
588         btrfs_free_path(path);
589         return ret;
590 }
591
592 static int del_qgroup_item(struct btrfs_trans_handle *trans,
593                            struct btrfs_root *quota_root, u64 qgroupid)
594 {
595         int ret;
596         struct btrfs_path *path;
597         struct btrfs_key key;
598
599         path = btrfs_alloc_path();
600         if (!path)
601                 return -ENOMEM;
602
603         key.objectid = 0;
604         key.type = BTRFS_QGROUP_INFO_KEY;
605         key.offset = qgroupid;
606         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
607         if (ret < 0)
608                 goto out;
609
610         if (ret > 0) {
611                 ret = -ENOENT;
612                 goto out;
613         }
614
615         ret = btrfs_del_item(trans, quota_root, path);
616         if (ret)
617                 goto out;
618
619         btrfs_release_path(path);
620
621         key.type = BTRFS_QGROUP_LIMIT_KEY;
622         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
623         if (ret < 0)
624                 goto out;
625
626         if (ret > 0) {
627                 ret = -ENOENT;
628                 goto out;
629         }
630
631         ret = btrfs_del_item(trans, quota_root, path);
632
633 out:
634         btrfs_free_path(path);
635         return ret;
636 }
637
638 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
639                                     struct btrfs_root *root,
640                                     struct btrfs_qgroup *qgroup)
641 {
642         struct btrfs_path *path;
643         struct btrfs_key key;
644         struct extent_buffer *l;
645         struct btrfs_qgroup_limit_item *qgroup_limit;
646         int ret;
647         int slot;
648
649         key.objectid = 0;
650         key.type = BTRFS_QGROUP_LIMIT_KEY;
651         key.offset = qgroup->qgroupid;
652
653         path = btrfs_alloc_path();
654         if (!path)
655                 return -ENOMEM;
656
657         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
658         if (ret > 0)
659                 ret = -ENOENT;
660
661         if (ret)
662                 goto out;
663
664         l = path->nodes[0];
665         slot = path->slots[0];
666         qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
667         btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags);
668         btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer);
669         btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
670         btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
671         btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
672
673         btrfs_mark_buffer_dirty(l);
674
675 out:
676         btrfs_free_path(path);
677         return ret;
678 }
679
680 static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
681                                    struct btrfs_root *root,
682                                    struct btrfs_qgroup *qgroup)
683 {
684         struct btrfs_path *path;
685         struct btrfs_key key;
686         struct extent_buffer *l;
687         struct btrfs_qgroup_info_item *qgroup_info;
688         int ret;
689         int slot;
690
691         if (btrfs_is_testing(root->fs_info))
692                 return 0;
693
694         key.objectid = 0;
695         key.type = BTRFS_QGROUP_INFO_KEY;
696         key.offset = qgroup->qgroupid;
697
698         path = btrfs_alloc_path();
699         if (!path)
700                 return -ENOMEM;
701
702         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
703         if (ret > 0)
704                 ret = -ENOENT;
705
706         if (ret)
707                 goto out;
708
709         l = path->nodes[0];
710         slot = path->slots[0];
711         qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
712         btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
713         btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
714         btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
715         btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
716         btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
717
718         btrfs_mark_buffer_dirty(l);
719
720 out:
721         btrfs_free_path(path);
722         return ret;
723 }
724
725 static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
726                                      struct btrfs_fs_info *fs_info,
727                                     struct btrfs_root *root)
728 {
729         struct btrfs_path *path;
730         struct btrfs_key key;
731         struct extent_buffer *l;
732         struct btrfs_qgroup_status_item *ptr;
733         int ret;
734         int slot;
735
736         key.objectid = 0;
737         key.type = BTRFS_QGROUP_STATUS_KEY;
738         key.offset = 0;
739
740         path = btrfs_alloc_path();
741         if (!path)
742                 return -ENOMEM;
743
744         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
745         if (ret > 0)
746                 ret = -ENOENT;
747
748         if (ret)
749                 goto out;
750
751         l = path->nodes[0];
752         slot = path->slots[0];
753         ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
754         btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
755         btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
756         btrfs_set_qgroup_status_rescan(l, ptr,
757                                 fs_info->qgroup_rescan_progress.objectid);
758
759         btrfs_mark_buffer_dirty(l);
760
761 out:
762         btrfs_free_path(path);
763         return ret;
764 }
765
766 /*
767  * called with qgroup_lock held
768  */
769 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
770                                   struct btrfs_root *root)
771 {
772         struct btrfs_path *path;
773         struct btrfs_key key;
774         struct extent_buffer *leaf = NULL;
775         int ret;
776         int nr = 0;
777
778         path = btrfs_alloc_path();
779         if (!path)
780                 return -ENOMEM;
781
782         path->leave_spinning = 1;
783
784         key.objectid = 0;
785         key.offset = 0;
786         key.type = 0;
787
788         while (1) {
789                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
790                 if (ret < 0)
791                         goto out;
792                 leaf = path->nodes[0];
793                 nr = btrfs_header_nritems(leaf);
794                 if (!nr)
795                         break;
796                 /*
797                  * delete the leaf one by one
798                  * since the whole tree is going
799                  * to be deleted.
800                  */
801                 path->slots[0] = 0;
802                 ret = btrfs_del_items(trans, root, path, 0, nr);
803                 if (ret)
804                         goto out;
805
806                 btrfs_release_path(path);
807         }
808         ret = 0;
809 out:
810         btrfs_free_path(path);
811         return ret;
812 }
813
814 int btrfs_quota_enable(struct btrfs_trans_handle *trans,
815                        struct btrfs_fs_info *fs_info)
816 {
817         struct btrfs_root *quota_root;
818         struct btrfs_root *tree_root = fs_info->tree_root;
819         struct btrfs_path *path = NULL;
820         struct btrfs_qgroup_status_item *ptr;
821         struct extent_buffer *leaf;
822         struct btrfs_key key;
823         struct btrfs_key found_key;
824         struct btrfs_qgroup *qgroup = NULL;
825         int ret = 0;
826         int slot;
827
828         mutex_lock(&fs_info->qgroup_ioctl_lock);
829         if (fs_info->quota_root)
830                 goto out;
831
832         fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
833         if (!fs_info->qgroup_ulist) {
834                 ret = -ENOMEM;
835                 goto out;
836         }
837
838         /*
839          * initially create the quota tree
840          */
841         quota_root = btrfs_create_tree(trans, fs_info,
842                                        BTRFS_QUOTA_TREE_OBJECTID);
843         if (IS_ERR(quota_root)) {
844                 ret =  PTR_ERR(quota_root);
845                 goto out;
846         }
847
848         path = btrfs_alloc_path();
849         if (!path) {
850                 ret = -ENOMEM;
851                 goto out_free_root;
852         }
853
854         key.objectid = 0;
855         key.type = BTRFS_QGROUP_STATUS_KEY;
856         key.offset = 0;
857
858         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
859                                       sizeof(*ptr));
860         if (ret)
861                 goto out_free_path;
862
863         leaf = path->nodes[0];
864         ptr = btrfs_item_ptr(leaf, path->slots[0],
865                                  struct btrfs_qgroup_status_item);
866         btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
867         btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
868         fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
869                                 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
870         btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
871         btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
872
873         btrfs_mark_buffer_dirty(leaf);
874
875         key.objectid = 0;
876         key.type = BTRFS_ROOT_REF_KEY;
877         key.offset = 0;
878
879         btrfs_release_path(path);
880         ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
881         if (ret > 0)
882                 goto out_add_root;
883         if (ret < 0)
884                 goto out_free_path;
885
886
887         while (1) {
888                 slot = path->slots[0];
889                 leaf = path->nodes[0];
890                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
891
892                 if (found_key.type == BTRFS_ROOT_REF_KEY) {
893                         ret = add_qgroup_item(trans, quota_root,
894                                               found_key.offset);
895                         if (ret)
896                                 goto out_free_path;
897
898                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
899                         if (IS_ERR(qgroup)) {
900                                 ret = PTR_ERR(qgroup);
901                                 goto out_free_path;
902                         }
903                 }
904                 ret = btrfs_next_item(tree_root, path);
905                 if (ret < 0)
906                         goto out_free_path;
907                 if (ret)
908                         break;
909         }
910
911 out_add_root:
912         btrfs_release_path(path);
913         ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
914         if (ret)
915                 goto out_free_path;
916
917         qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
918         if (IS_ERR(qgroup)) {
919                 ret = PTR_ERR(qgroup);
920                 goto out_free_path;
921         }
922         spin_lock(&fs_info->qgroup_lock);
923         fs_info->quota_root = quota_root;
924         set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
925         spin_unlock(&fs_info->qgroup_lock);
926         ret = qgroup_rescan_init(fs_info, 0, 1);
927         if (!ret) {
928                 qgroup_rescan_zero_tracking(fs_info);
929                 btrfs_queue_work(fs_info->qgroup_rescan_workers,
930                                  &fs_info->qgroup_rescan_work);
931         }
932
933 out_free_path:
934         btrfs_free_path(path);
935 out_free_root:
936         if (ret) {
937                 free_extent_buffer(quota_root->node);
938                 free_extent_buffer(quota_root->commit_root);
939                 kfree(quota_root);
940         }
941 out:
942         if (ret) {
943                 ulist_free(fs_info->qgroup_ulist);
944                 fs_info->qgroup_ulist = NULL;
945         }
946         mutex_unlock(&fs_info->qgroup_ioctl_lock);
947         return ret;
948 }
949
950 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
951                         struct btrfs_fs_info *fs_info)
952 {
953         struct btrfs_root *quota_root;
954         int ret = 0;
955
956         mutex_lock(&fs_info->qgroup_ioctl_lock);
957         if (!fs_info->quota_root)
958                 goto out;
959         clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
960         btrfs_qgroup_wait_for_completion(fs_info, false);
961         spin_lock(&fs_info->qgroup_lock);
962         quota_root = fs_info->quota_root;
963         fs_info->quota_root = NULL;
964         fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
965         spin_unlock(&fs_info->qgroup_lock);
966
967         btrfs_free_qgroup_config(fs_info);
968
969         ret = btrfs_clean_quota_tree(trans, quota_root);
970         if (ret)
971                 goto out;
972
973         ret = btrfs_del_root(trans, fs_info, &quota_root->root_key);
974         if (ret)
975                 goto out;
976
977         list_del(&quota_root->dirty_list);
978
979         btrfs_tree_lock(quota_root->node);
980         clean_tree_block(fs_info, quota_root->node);
981         btrfs_tree_unlock(quota_root->node);
982         btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
983
984         free_extent_buffer(quota_root->node);
985         free_extent_buffer(quota_root->commit_root);
986         kfree(quota_root);
987 out:
988         mutex_unlock(&fs_info->qgroup_ioctl_lock);
989         return ret;
990 }
991
992 static void qgroup_dirty(struct btrfs_fs_info *fs_info,
993                          struct btrfs_qgroup *qgroup)
994 {
995         if (list_empty(&qgroup->dirty))
996                 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
997 }
998
999 static void report_reserved_underflow(struct btrfs_fs_info *fs_info,
1000                                       struct btrfs_qgroup *qgroup,
1001                                       u64 num_bytes)
1002 {
1003 #ifdef CONFIG_BTRFS_DEBUG
1004         WARN_ON(qgroup->reserved < num_bytes);
1005         btrfs_debug(fs_info,
1006                 "qgroup %llu reserved space underflow, have: %llu, to free: %llu",
1007                 qgroup->qgroupid, qgroup->reserved, num_bytes);
1008 #endif
1009         qgroup->reserved = 0;
1010 }
1011 /*
1012  * The easy accounting, if we are adding/removing the only ref for an extent
1013  * then this qgroup and all of the parent qgroups get their reference and
1014  * exclusive counts adjusted.
1015  *
1016  * Caller should hold fs_info->qgroup_lock.
1017  */
1018 static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1019                                     struct ulist *tmp, u64 ref_root,
1020                                     u64 num_bytes, int sign)
1021 {
1022         struct btrfs_qgroup *qgroup;
1023         struct btrfs_qgroup_list *glist;
1024         struct ulist_node *unode;
1025         struct ulist_iterator uiter;
1026         int ret = 0;
1027
1028         qgroup = find_qgroup_rb(fs_info, ref_root);
1029         if (!qgroup)
1030                 goto out;
1031
1032         qgroup->rfer += sign * num_bytes;
1033         qgroup->rfer_cmpr += sign * num_bytes;
1034
1035         WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1036         qgroup->excl += sign * num_bytes;
1037         qgroup->excl_cmpr += sign * num_bytes;
1038         if (sign > 0) {
1039                 trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes);
1040                 if (qgroup->reserved < num_bytes)
1041                         report_reserved_underflow(fs_info, qgroup, num_bytes);
1042                 else
1043                         qgroup->reserved -= num_bytes;
1044         }
1045
1046         qgroup_dirty(fs_info, qgroup);
1047
1048         /* Get all of the parent groups that contain this qgroup */
1049         list_for_each_entry(glist, &qgroup->groups, next_group) {
1050                 ret = ulist_add(tmp, glist->group->qgroupid,
1051                                 qgroup_to_aux(glist->group), GFP_ATOMIC);
1052                 if (ret < 0)
1053                         goto out;
1054         }
1055
1056         /* Iterate all of the parents and adjust their reference counts */
1057         ULIST_ITER_INIT(&uiter);
1058         while ((unode = ulist_next(tmp, &uiter))) {
1059                 qgroup = unode_aux_to_qgroup(unode);
1060                 qgroup->rfer += sign * num_bytes;
1061                 qgroup->rfer_cmpr += sign * num_bytes;
1062                 WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1063                 qgroup->excl += sign * num_bytes;
1064                 if (sign > 0) {
1065                         trace_qgroup_update_reserve(fs_info, qgroup,
1066                                                     -(s64)num_bytes);
1067                         if (qgroup->reserved < num_bytes)
1068                                 report_reserved_underflow(fs_info, qgroup,
1069                                                           num_bytes);
1070                         else
1071                                 qgroup->reserved -= num_bytes;
1072                 }
1073                 qgroup->excl_cmpr += sign * num_bytes;
1074                 qgroup_dirty(fs_info, qgroup);
1075
1076                 /* Add any parents of the parents */
1077                 list_for_each_entry(glist, &qgroup->groups, next_group) {
1078                         ret = ulist_add(tmp, glist->group->qgroupid,
1079                                         qgroup_to_aux(glist->group), GFP_ATOMIC);
1080                         if (ret < 0)
1081                                 goto out;
1082                 }
1083         }
1084         ret = 0;
1085 out:
1086         return ret;
1087 }
1088
1089
1090 /*
1091  * Quick path for updating qgroup with only excl refs.
1092  *
1093  * In that case, just update all parent will be enough.
1094  * Or we needs to do a full rescan.
1095  * Caller should also hold fs_info->qgroup_lock.
1096  *
1097  * Return 0 for quick update, return >0 for need to full rescan
1098  * and mark INCONSISTENT flag.
1099  * Return < 0 for other error.
1100  */
1101 static int quick_update_accounting(struct btrfs_fs_info *fs_info,
1102                                    struct ulist *tmp, u64 src, u64 dst,
1103                                    int sign)
1104 {
1105         struct btrfs_qgroup *qgroup;
1106         int ret = 1;
1107         int err = 0;
1108
1109         qgroup = find_qgroup_rb(fs_info, src);
1110         if (!qgroup)
1111                 goto out;
1112         if (qgroup->excl == qgroup->rfer) {
1113                 ret = 0;
1114                 err = __qgroup_excl_accounting(fs_info, tmp, dst,
1115                                                qgroup->excl, sign);
1116                 if (err < 0) {
1117                         ret = err;
1118                         goto out;
1119                 }
1120         }
1121 out:
1122         if (ret)
1123                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1124         return ret;
1125 }
1126
1127 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
1128                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1129 {
1130         struct btrfs_root *quota_root;
1131         struct btrfs_qgroup *parent;
1132         struct btrfs_qgroup *member;
1133         struct btrfs_qgroup_list *list;
1134         struct ulist *tmp;
1135         int ret = 0;
1136
1137         /* Check the level of src and dst first */
1138         if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
1139                 return -EINVAL;
1140
1141         tmp = ulist_alloc(GFP_KERNEL);
1142         if (!tmp)
1143                 return -ENOMEM;
1144
1145         mutex_lock(&fs_info->qgroup_ioctl_lock);
1146         quota_root = fs_info->quota_root;
1147         if (!quota_root) {
1148                 ret = -EINVAL;
1149                 goto out;
1150         }
1151         member = find_qgroup_rb(fs_info, src);
1152         parent = find_qgroup_rb(fs_info, dst);
1153         if (!member || !parent) {
1154                 ret = -EINVAL;
1155                 goto out;
1156         }
1157
1158         /* check if such qgroup relation exist firstly */
1159         list_for_each_entry(list, &member->groups, next_group) {
1160                 if (list->group == parent) {
1161                         ret = -EEXIST;
1162                         goto out;
1163                 }
1164         }
1165
1166         ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1167         if (ret)
1168                 goto out;
1169
1170         ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1171         if (ret) {
1172                 del_qgroup_relation_item(trans, quota_root, src, dst);
1173                 goto out;
1174         }
1175
1176         spin_lock(&fs_info->qgroup_lock);
1177         ret = add_relation_rb(fs_info, src, dst);
1178         if (ret < 0) {
1179                 spin_unlock(&fs_info->qgroup_lock);
1180                 goto out;
1181         }
1182         ret = quick_update_accounting(fs_info, tmp, src, dst, 1);
1183         spin_unlock(&fs_info->qgroup_lock);
1184 out:
1185         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1186         ulist_free(tmp);
1187         return ret;
1188 }
1189
1190 static int __del_qgroup_relation(struct btrfs_trans_handle *trans,
1191                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1192 {
1193         struct btrfs_root *quota_root;
1194         struct btrfs_qgroup *parent;
1195         struct btrfs_qgroup *member;
1196         struct btrfs_qgroup_list *list;
1197         struct ulist *tmp;
1198         int ret = 0;
1199         int err;
1200
1201         tmp = ulist_alloc(GFP_KERNEL);
1202         if (!tmp)
1203                 return -ENOMEM;
1204
1205         quota_root = fs_info->quota_root;
1206         if (!quota_root) {
1207                 ret = -EINVAL;
1208                 goto out;
1209         }
1210
1211         member = find_qgroup_rb(fs_info, src);
1212         parent = find_qgroup_rb(fs_info, dst);
1213         if (!member || !parent) {
1214                 ret = -EINVAL;
1215                 goto out;
1216         }
1217
1218         /* check if such qgroup relation exist firstly */
1219         list_for_each_entry(list, &member->groups, next_group) {
1220                 if (list->group == parent)
1221                         goto exist;
1222         }
1223         ret = -ENOENT;
1224         goto out;
1225 exist:
1226         ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1227         err = del_qgroup_relation_item(trans, quota_root, dst, src);
1228         if (err && !ret)
1229                 ret = err;
1230
1231         spin_lock(&fs_info->qgroup_lock);
1232         del_relation_rb(fs_info, src, dst);
1233         ret = quick_update_accounting(fs_info, tmp, src, dst, -1);
1234         spin_unlock(&fs_info->qgroup_lock);
1235 out:
1236         ulist_free(tmp);
1237         return ret;
1238 }
1239
1240 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1241                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1242 {
1243         int ret = 0;
1244
1245         mutex_lock(&fs_info->qgroup_ioctl_lock);
1246         ret = __del_qgroup_relation(trans, fs_info, src, dst);
1247         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1248
1249         return ret;
1250 }
1251
1252 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1253                         struct btrfs_fs_info *fs_info, u64 qgroupid)
1254 {
1255         struct btrfs_root *quota_root;
1256         struct btrfs_qgroup *qgroup;
1257         int ret = 0;
1258
1259         mutex_lock(&fs_info->qgroup_ioctl_lock);
1260         quota_root = fs_info->quota_root;
1261         if (!quota_root) {
1262                 ret = -EINVAL;
1263                 goto out;
1264         }
1265         qgroup = find_qgroup_rb(fs_info, qgroupid);
1266         if (qgroup) {
1267                 ret = -EEXIST;
1268                 goto out;
1269         }
1270
1271         ret = add_qgroup_item(trans, quota_root, qgroupid);
1272         if (ret)
1273                 goto out;
1274
1275         spin_lock(&fs_info->qgroup_lock);
1276         qgroup = add_qgroup_rb(fs_info, qgroupid);
1277         spin_unlock(&fs_info->qgroup_lock);
1278
1279         if (IS_ERR(qgroup))
1280                 ret = PTR_ERR(qgroup);
1281 out:
1282         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1283         return ret;
1284 }
1285
1286 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1287                         struct btrfs_fs_info *fs_info, u64 qgroupid)
1288 {
1289         struct btrfs_root *quota_root;
1290         struct btrfs_qgroup *qgroup;
1291         struct btrfs_qgroup_list *list;
1292         int ret = 0;
1293
1294         mutex_lock(&fs_info->qgroup_ioctl_lock);
1295         quota_root = fs_info->quota_root;
1296         if (!quota_root) {
1297                 ret = -EINVAL;
1298                 goto out;
1299         }
1300
1301         qgroup = find_qgroup_rb(fs_info, qgroupid);
1302         if (!qgroup) {
1303                 ret = -ENOENT;
1304                 goto out;
1305         } else {
1306                 /* check if there are no children of this qgroup */
1307                 if (!list_empty(&qgroup->members)) {
1308                         ret = -EBUSY;
1309                         goto out;
1310                 }
1311         }
1312         ret = del_qgroup_item(trans, quota_root, qgroupid);
1313         if (ret && ret != -ENOENT)
1314                 goto out;
1315
1316         while (!list_empty(&qgroup->groups)) {
1317                 list = list_first_entry(&qgroup->groups,
1318                                         struct btrfs_qgroup_list, next_group);
1319                 ret = __del_qgroup_relation(trans, fs_info,
1320                                            qgroupid,
1321                                            list->group->qgroupid);
1322                 if (ret)
1323                         goto out;
1324         }
1325
1326         spin_lock(&fs_info->qgroup_lock);
1327         del_qgroup_rb(fs_info, qgroupid);
1328         spin_unlock(&fs_info->qgroup_lock);
1329 out:
1330         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1331         return ret;
1332 }
1333
1334 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1335                        struct btrfs_fs_info *fs_info, u64 qgroupid,
1336                        struct btrfs_qgroup_limit *limit)
1337 {
1338         struct btrfs_root *quota_root;
1339         struct btrfs_qgroup *qgroup;
1340         int ret = 0;
1341         /* Sometimes we would want to clear the limit on this qgroup.
1342          * To meet this requirement, we treat the -1 as a special value
1343          * which tell kernel to clear the limit on this qgroup.
1344          */
1345         const u64 CLEAR_VALUE = -1;
1346
1347         mutex_lock(&fs_info->qgroup_ioctl_lock);
1348         quota_root = fs_info->quota_root;
1349         if (!quota_root) {
1350                 ret = -EINVAL;
1351                 goto out;
1352         }
1353
1354         qgroup = find_qgroup_rb(fs_info, qgroupid);
1355         if (!qgroup) {
1356                 ret = -ENOENT;
1357                 goto out;
1358         }
1359
1360         spin_lock(&fs_info->qgroup_lock);
1361         if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) {
1362                 if (limit->max_rfer == CLEAR_VALUE) {
1363                         qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
1364                         limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
1365                         qgroup->max_rfer = 0;
1366                 } else {
1367                         qgroup->max_rfer = limit->max_rfer;
1368                 }
1369         }
1370         if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) {
1371                 if (limit->max_excl == CLEAR_VALUE) {
1372                         qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
1373                         limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
1374                         qgroup->max_excl = 0;
1375                 } else {
1376                         qgroup->max_excl = limit->max_excl;
1377                 }
1378         }
1379         if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) {
1380                 if (limit->rsv_rfer == CLEAR_VALUE) {
1381                         qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
1382                         limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
1383                         qgroup->rsv_rfer = 0;
1384                 } else {
1385                         qgroup->rsv_rfer = limit->rsv_rfer;
1386                 }
1387         }
1388         if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) {
1389                 if (limit->rsv_excl == CLEAR_VALUE) {
1390                         qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
1391                         limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
1392                         qgroup->rsv_excl = 0;
1393                 } else {
1394                         qgroup->rsv_excl = limit->rsv_excl;
1395                 }
1396         }
1397         qgroup->lim_flags |= limit->flags;
1398
1399         spin_unlock(&fs_info->qgroup_lock);
1400
1401         ret = update_qgroup_limit_item(trans, quota_root, qgroup);
1402         if (ret) {
1403                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1404                 btrfs_info(fs_info, "unable to update quota limit for %llu",
1405                        qgroupid);
1406         }
1407
1408 out:
1409         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1410         return ret;
1411 }
1412
1413 int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
1414                                 struct btrfs_delayed_ref_root *delayed_refs,
1415                                 struct btrfs_qgroup_extent_record *record)
1416 {
1417         struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
1418         struct rb_node *parent_node = NULL;
1419         struct btrfs_qgroup_extent_record *entry;
1420         u64 bytenr = record->bytenr;
1421
1422         assert_spin_locked(&delayed_refs->lock);
1423         trace_btrfs_qgroup_trace_extent(fs_info, record);
1424
1425         while (*p) {
1426                 parent_node = *p;
1427                 entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
1428                                  node);
1429                 if (bytenr < entry->bytenr)
1430                         p = &(*p)->rb_left;
1431                 else if (bytenr > entry->bytenr)
1432                         p = &(*p)->rb_right;
1433                 else
1434                         return 1;
1435         }
1436
1437         rb_link_node(&record->node, parent_node, p);
1438         rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
1439         return 0;
1440 }
1441
1442 int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
1443                                    struct btrfs_qgroup_extent_record *qrecord)
1444 {
1445         struct ulist *old_root;
1446         u64 bytenr = qrecord->bytenr;
1447         int ret;
1448
1449         ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
1450         if (ret < 0) {
1451                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1452                 btrfs_warn(fs_info,
1453 "error accounting new delayed refs extent (err code: %d), quota inconsistent",
1454                         ret);
1455                 return 0;
1456         }
1457
1458         /*
1459          * Here we don't need to get the lock of
1460          * trans->transaction->delayed_refs, since inserted qrecord won't
1461          * be deleted, only qrecord->node may be modified (new qrecord insert)
1462          *
1463          * So modifying qrecord->old_roots is safe here
1464          */
1465         qrecord->old_roots = old_root;
1466         return 0;
1467 }
1468
1469 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
1470                 struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
1471                 gfp_t gfp_flag)
1472 {
1473         struct btrfs_qgroup_extent_record *record;
1474         struct btrfs_delayed_ref_root *delayed_refs;
1475         int ret;
1476
1477         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)
1478             || bytenr == 0 || num_bytes == 0)
1479                 return 0;
1480         if (WARN_ON(trans == NULL))
1481                 return -EINVAL;
1482         record = kmalloc(sizeof(*record), gfp_flag);
1483         if (!record)
1484                 return -ENOMEM;
1485
1486         delayed_refs = &trans->transaction->delayed_refs;
1487         record->bytenr = bytenr;
1488         record->num_bytes = num_bytes;
1489         record->old_roots = NULL;
1490
1491         spin_lock(&delayed_refs->lock);
1492         ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
1493         spin_unlock(&delayed_refs->lock);
1494         if (ret > 0) {
1495                 kfree(record);
1496                 return 0;
1497         }
1498         return btrfs_qgroup_trace_extent_post(fs_info, record);
1499 }
1500
1501 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
1502                                   struct btrfs_fs_info *fs_info,
1503                                   struct extent_buffer *eb)
1504 {
1505         int nr = btrfs_header_nritems(eb);
1506         int i, extent_type, ret;
1507         struct btrfs_key key;
1508         struct btrfs_file_extent_item *fi;
1509         u64 bytenr, num_bytes;
1510
1511         /* We can be called directly from walk_up_proc() */
1512         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
1513                 return 0;
1514
1515         for (i = 0; i < nr; i++) {
1516                 btrfs_item_key_to_cpu(eb, &key, i);
1517
1518                 if (key.type != BTRFS_EXTENT_DATA_KEY)
1519                         continue;
1520
1521                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
1522                 /* filter out non qgroup-accountable extents  */
1523                 extent_type = btrfs_file_extent_type(eb, fi);
1524
1525                 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
1526                         continue;
1527
1528                 bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1529                 if (!bytenr)
1530                         continue;
1531
1532                 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1533
1534                 ret = btrfs_qgroup_trace_extent(trans, fs_info, bytenr,
1535                                                 num_bytes, GFP_NOFS);
1536                 if (ret)
1537                         return ret;
1538         }
1539         cond_resched();
1540         return 0;
1541 }
1542
1543 /*
1544  * Walk up the tree from the bottom, freeing leaves and any interior
1545  * nodes which have had all slots visited. If a node (leaf or
1546  * interior) is freed, the node above it will have it's slot
1547  * incremented. The root node will never be freed.
1548  *
1549  * At the end of this function, we should have a path which has all
1550  * slots incremented to the next position for a search. If we need to
1551  * read a new node it will be NULL and the node above it will have the
1552  * correct slot selected for a later read.
1553  *
1554  * If we increment the root nodes slot counter past the number of
1555  * elements, 1 is returned to signal completion of the search.
1556  */
1557 static int adjust_slots_upwards(struct btrfs_path *path, int root_level)
1558 {
1559         int level = 0;
1560         int nr, slot;
1561         struct extent_buffer *eb;
1562
1563         if (root_level == 0)
1564                 return 1;
1565
1566         while (level <= root_level) {
1567                 eb = path->nodes[level];
1568                 nr = btrfs_header_nritems(eb);
1569                 path->slots[level]++;
1570                 slot = path->slots[level];
1571                 if (slot >= nr || level == 0) {
1572                         /*
1573                          * Don't free the root -  we will detect this
1574                          * condition after our loop and return a
1575                          * positive value for caller to stop walking the tree.
1576                          */
1577                         if (level != root_level) {
1578                                 btrfs_tree_unlock_rw(eb, path->locks[level]);
1579                                 path->locks[level] = 0;
1580
1581                                 free_extent_buffer(eb);
1582                                 path->nodes[level] = NULL;
1583                                 path->slots[level] = 0;
1584                         }
1585                 } else {
1586                         /*
1587                          * We have a valid slot to walk back down
1588                          * from. Stop here so caller can process these
1589                          * new nodes.
1590                          */
1591                         break;
1592                 }
1593
1594                 level++;
1595         }
1596
1597         eb = path->nodes[root_level];
1598         if (path->slots[root_level] >= btrfs_header_nritems(eb))
1599                 return 1;
1600
1601         return 0;
1602 }
1603
1604 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
1605                                struct btrfs_root *root,
1606                                struct extent_buffer *root_eb,
1607                                u64 root_gen, int root_level)
1608 {
1609         struct btrfs_fs_info *fs_info = root->fs_info;
1610         int ret = 0;
1611         int level;
1612         struct extent_buffer *eb = root_eb;
1613         struct btrfs_path *path = NULL;
1614
1615         BUG_ON(root_level < 0 || root_level >= BTRFS_MAX_LEVEL);
1616         BUG_ON(root_eb == NULL);
1617
1618         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
1619                 return 0;
1620
1621         if (!extent_buffer_uptodate(root_eb)) {
1622                 ret = btrfs_read_buffer(root_eb, root_gen);
1623                 if (ret)
1624                         goto out;
1625         }
1626
1627         if (root_level == 0) {
1628                 ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, root_eb);
1629                 goto out;
1630         }
1631
1632         path = btrfs_alloc_path();
1633         if (!path)
1634                 return -ENOMEM;
1635
1636         /*
1637          * Walk down the tree.  Missing extent blocks are filled in as
1638          * we go. Metadata is accounted every time we read a new
1639          * extent block.
1640          *
1641          * When we reach a leaf, we account for file extent items in it,
1642          * walk back up the tree (adjusting slot pointers as we go)
1643          * and restart the search process.
1644          */
1645         extent_buffer_get(root_eb); /* For path */
1646         path->nodes[root_level] = root_eb;
1647         path->slots[root_level] = 0;
1648         path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
1649 walk_down:
1650         level = root_level;
1651         while (level >= 0) {
1652                 if (path->nodes[level] == NULL) {
1653                         int parent_slot;
1654                         u64 child_gen;
1655                         u64 child_bytenr;
1656
1657                         /*
1658                          * We need to get child blockptr/gen from parent before
1659                          * we can read it.
1660                           */
1661                         eb = path->nodes[level + 1];
1662                         parent_slot = path->slots[level + 1];
1663                         child_bytenr = btrfs_node_blockptr(eb, parent_slot);
1664                         child_gen = btrfs_node_ptr_generation(eb, parent_slot);
1665
1666                         eb = read_tree_block(fs_info, child_bytenr, child_gen);
1667                         if (IS_ERR(eb)) {
1668                                 ret = PTR_ERR(eb);
1669                                 goto out;
1670                         } else if (!extent_buffer_uptodate(eb)) {
1671                                 free_extent_buffer(eb);
1672                                 ret = -EIO;
1673                                 goto out;
1674                         }
1675
1676                         path->nodes[level] = eb;
1677                         path->slots[level] = 0;
1678
1679                         btrfs_tree_read_lock(eb);
1680                         btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1681                         path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
1682
1683                         ret = btrfs_qgroup_trace_extent(trans, fs_info,
1684                                                         child_bytenr,
1685                                                         fs_info->nodesize,
1686                                                         GFP_NOFS);
1687                         if (ret)
1688                                 goto out;
1689                 }
1690
1691                 if (level == 0) {
1692                         ret = btrfs_qgroup_trace_leaf_items(trans,fs_info,
1693                                                            path->nodes[level]);
1694                         if (ret)
1695                                 goto out;
1696
1697                         /* Nonzero return here means we completed our search */
1698                         ret = adjust_slots_upwards(path, root_level);
1699                         if (ret)
1700                                 break;
1701
1702                         /* Restart search with new slots */
1703                         goto walk_down;
1704                 }
1705
1706                 level--;
1707         }
1708
1709         ret = 0;
1710 out:
1711         btrfs_free_path(path);
1712
1713         return ret;
1714 }
1715
1716 #define UPDATE_NEW      0
1717 #define UPDATE_OLD      1
1718 /*
1719  * Walk all of the roots that points to the bytenr and adjust their refcnts.
1720  */
1721 static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
1722                                 struct ulist *roots, struct ulist *tmp,
1723                                 struct ulist *qgroups, u64 seq, int update_old)
1724 {
1725         struct ulist_node *unode;
1726         struct ulist_iterator uiter;
1727         struct ulist_node *tmp_unode;
1728         struct ulist_iterator tmp_uiter;
1729         struct btrfs_qgroup *qg;
1730         int ret = 0;
1731
1732         if (!roots)
1733                 return 0;
1734         ULIST_ITER_INIT(&uiter);
1735         while ((unode = ulist_next(roots, &uiter))) {
1736                 qg = find_qgroup_rb(fs_info, unode->val);
1737                 if (!qg)
1738                         continue;
1739
1740                 ulist_reinit(tmp);
1741                 ret = ulist_add(qgroups, qg->qgroupid, qgroup_to_aux(qg),
1742                                 GFP_ATOMIC);
1743                 if (ret < 0)
1744                         return ret;
1745                 ret = ulist_add(tmp, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC);
1746                 if (ret < 0)
1747                         return ret;
1748                 ULIST_ITER_INIT(&tmp_uiter);
1749                 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1750                         struct btrfs_qgroup_list *glist;
1751
1752                         qg = unode_aux_to_qgroup(tmp_unode);
1753                         if (update_old)
1754                                 btrfs_qgroup_update_old_refcnt(qg, seq, 1);
1755                         else
1756                                 btrfs_qgroup_update_new_refcnt(qg, seq, 1);
1757                         list_for_each_entry(glist, &qg->groups, next_group) {
1758                                 ret = ulist_add(qgroups, glist->group->qgroupid,
1759                                                 qgroup_to_aux(glist->group),
1760                                                 GFP_ATOMIC);
1761                                 if (ret < 0)
1762                                         return ret;
1763                                 ret = ulist_add(tmp, glist->group->qgroupid,
1764                                                 qgroup_to_aux(glist->group),
1765                                                 GFP_ATOMIC);
1766                                 if (ret < 0)
1767                                         return ret;
1768                         }
1769                 }
1770         }
1771         return 0;
1772 }
1773
1774 /*
1775  * Update qgroup rfer/excl counters.
1776  * Rfer update is easy, codes can explain themselves.
1777  *
1778  * Excl update is tricky, the update is split into 2 part.
1779  * Part 1: Possible exclusive <-> sharing detect:
1780  *      |       A       |       !A      |
1781  *  -------------------------------------
1782  *  B   |       *       |       -       |
1783  *  -------------------------------------
1784  *  !B  |       +       |       **      |
1785  *  -------------------------------------
1786  *
1787  * Conditions:
1788  * A:   cur_old_roots < nr_old_roots    (not exclusive before)
1789  * !A:  cur_old_roots == nr_old_roots   (possible exclusive before)
1790  * B:   cur_new_roots < nr_new_roots    (not exclusive now)
1791  * !B:  cur_new_roots == nr_new_roots   (possible exclusive now)
1792  *
1793  * Results:
1794  * +: Possible sharing -> exclusive     -: Possible exclusive -> sharing
1795  * *: Definitely not changed.           **: Possible unchanged.
1796  *
1797  * For !A and !B condition, the exception is cur_old/new_roots == 0 case.
1798  *
1799  * To make the logic clear, we first use condition A and B to split
1800  * combination into 4 results.
1801  *
1802  * Then, for result "+" and "-", check old/new_roots == 0 case, as in them
1803  * only on variant maybe 0.
1804  *
1805  * Lastly, check result **, since there are 2 variants maybe 0, split them
1806  * again(2x2).
1807  * But this time we don't need to consider other things, the codes and logic
1808  * is easy to understand now.
1809  */
1810 static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
1811                                   struct ulist *qgroups,
1812                                   u64 nr_old_roots,
1813                                   u64 nr_new_roots,
1814                                   u64 num_bytes, u64 seq)
1815 {
1816         struct ulist_node *unode;
1817         struct ulist_iterator uiter;
1818         struct btrfs_qgroup *qg;
1819         u64 cur_new_count, cur_old_count;
1820
1821         ULIST_ITER_INIT(&uiter);
1822         while ((unode = ulist_next(qgroups, &uiter))) {
1823                 bool dirty = false;
1824
1825                 qg = unode_aux_to_qgroup(unode);
1826                 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
1827                 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
1828
1829                 trace_qgroup_update_counters(fs_info, qg->qgroupid,
1830                                              cur_old_count, cur_new_count);
1831
1832                 /* Rfer update part */
1833                 if (cur_old_count == 0 && cur_new_count > 0) {
1834                         qg->rfer += num_bytes;
1835                         qg->rfer_cmpr += num_bytes;
1836                         dirty = true;
1837                 }
1838                 if (cur_old_count > 0 && cur_new_count == 0) {
1839                         qg->rfer -= num_bytes;
1840                         qg->rfer_cmpr -= num_bytes;
1841                         dirty = true;
1842                 }
1843
1844                 /* Excl update part */
1845                 /* Exclusive/none -> shared case */
1846                 if (cur_old_count == nr_old_roots &&
1847                     cur_new_count < nr_new_roots) {
1848                         /* Exclusive -> shared */
1849                         if (cur_old_count != 0) {
1850                                 qg->excl -= num_bytes;
1851                                 qg->excl_cmpr -= num_bytes;
1852                                 dirty = true;
1853                         }
1854                 }
1855
1856                 /* Shared -> exclusive/none case */
1857                 if (cur_old_count < nr_old_roots &&
1858                     cur_new_count == nr_new_roots) {
1859                         /* Shared->exclusive */
1860                         if (cur_new_count != 0) {
1861                                 qg->excl += num_bytes;
1862                                 qg->excl_cmpr += num_bytes;
1863                                 dirty = true;
1864                         }
1865                 }
1866
1867                 /* Exclusive/none -> exclusive/none case */
1868                 if (cur_old_count == nr_old_roots &&
1869                     cur_new_count == nr_new_roots) {
1870                         if (cur_old_count == 0) {
1871                                 /* None -> exclusive/none */
1872
1873                                 if (cur_new_count != 0) {
1874                                         /* None -> exclusive */
1875                                         qg->excl += num_bytes;
1876                                         qg->excl_cmpr += num_bytes;
1877                                         dirty = true;
1878                                 }
1879                                 /* None -> none, nothing changed */
1880                         } else {
1881                                 /* Exclusive -> exclusive/none */
1882
1883                                 if (cur_new_count == 0) {
1884                                         /* Exclusive -> none */
1885                                         qg->excl -= num_bytes;
1886                                         qg->excl_cmpr -= num_bytes;
1887                                         dirty = true;
1888                                 }
1889                                 /* Exclusive -> exclusive, nothing changed */
1890                         }
1891                 }
1892
1893                 if (dirty)
1894                         qgroup_dirty(fs_info, qg);
1895         }
1896         return 0;
1897 }
1898
1899 /*
1900  * Check if the @roots potentially is a list of fs tree roots
1901  *
1902  * Return 0 for definitely not a fs/subvol tree roots ulist
1903  * Return 1 for possible fs/subvol tree roots in the list (considering an empty
1904  *          one as well)
1905  */
1906 static int maybe_fs_roots(struct ulist *roots)
1907 {
1908         struct ulist_node *unode;
1909         struct ulist_iterator uiter;
1910
1911         /* Empty one, still possible for fs roots */
1912         if (!roots || roots->nnodes == 0)
1913                 return 1;
1914
1915         ULIST_ITER_INIT(&uiter);
1916         unode = ulist_next(roots, &uiter);
1917         if (!unode)
1918                 return 1;
1919
1920         /*
1921          * If it contains fs tree roots, then it must belong to fs/subvol
1922          * trees.
1923          * If it contains a non-fs tree, it won't be shared with fs/subvol trees.
1924          */
1925         return is_fstree(unode->val);
1926 }
1927
1928 int
1929 btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
1930                             struct btrfs_fs_info *fs_info,
1931                             u64 bytenr, u64 num_bytes,
1932                             struct ulist *old_roots, struct ulist *new_roots)
1933 {
1934         struct ulist *qgroups = NULL;
1935         struct ulist *tmp = NULL;
1936         u64 seq;
1937         u64 nr_new_roots = 0;
1938         u64 nr_old_roots = 0;
1939         int ret = 0;
1940
1941         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
1942                 return 0;
1943
1944         if (new_roots) {
1945                 if (!maybe_fs_roots(new_roots))
1946                         goto out_free;
1947                 nr_new_roots = new_roots->nnodes;
1948         }
1949         if (old_roots) {
1950                 if (!maybe_fs_roots(old_roots))
1951                         goto out_free;
1952                 nr_old_roots = old_roots->nnodes;
1953         }
1954
1955         /* Quick exit, either not fs tree roots, or won't affect any qgroup */
1956         if (nr_old_roots == 0 && nr_new_roots == 0)
1957                 goto out_free;
1958
1959         BUG_ON(!fs_info->quota_root);
1960
1961         trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes,
1962                                           nr_old_roots, nr_new_roots);
1963
1964         qgroups = ulist_alloc(GFP_NOFS);
1965         if (!qgroups) {
1966                 ret = -ENOMEM;
1967                 goto out_free;
1968         }
1969         tmp = ulist_alloc(GFP_NOFS);
1970         if (!tmp) {
1971                 ret = -ENOMEM;
1972                 goto out_free;
1973         }
1974
1975         mutex_lock(&fs_info->qgroup_rescan_lock);
1976         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1977                 if (fs_info->qgroup_rescan_progress.objectid <= bytenr) {
1978                         mutex_unlock(&fs_info->qgroup_rescan_lock);
1979                         ret = 0;
1980                         goto out_free;
1981                 }
1982         }
1983         mutex_unlock(&fs_info->qgroup_rescan_lock);
1984
1985         spin_lock(&fs_info->qgroup_lock);
1986         seq = fs_info->qgroup_seq;
1987
1988         /* Update old refcnts using old_roots */
1989         ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq,
1990                                    UPDATE_OLD);
1991         if (ret < 0)
1992                 goto out;
1993
1994         /* Update new refcnts using new_roots */
1995         ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq,
1996                                    UPDATE_NEW);
1997         if (ret < 0)
1998                 goto out;
1999
2000         qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots,
2001                                num_bytes, seq);
2002
2003         /*
2004          * Bump qgroup_seq to avoid seq overlap
2005          */
2006         fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1;
2007 out:
2008         spin_unlock(&fs_info->qgroup_lock);
2009 out_free:
2010         ulist_free(tmp);
2011         ulist_free(qgroups);
2012         ulist_free(old_roots);
2013         ulist_free(new_roots);
2014         return ret;
2015 }
2016
2017 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
2018 {
2019         struct btrfs_fs_info *fs_info = trans->fs_info;
2020         struct btrfs_qgroup_extent_record *record;
2021         struct btrfs_delayed_ref_root *delayed_refs;
2022         struct ulist *new_roots = NULL;
2023         struct rb_node *node;
2024         u64 qgroup_to_skip;
2025         int ret = 0;
2026
2027         delayed_refs = &trans->transaction->delayed_refs;
2028         qgroup_to_skip = delayed_refs->qgroup_to_skip;
2029         while ((node = rb_first(&delayed_refs->dirty_extent_root))) {
2030                 record = rb_entry(node, struct btrfs_qgroup_extent_record,
2031                                   node);
2032
2033                 trace_btrfs_qgroup_account_extents(fs_info, record);
2034
2035                 if (!ret) {
2036                         /*
2037                          * Old roots should be searched when inserting qgroup
2038                          * extent record
2039                          */
2040                         if (WARN_ON(!record->old_roots)) {
2041                                 /* Search commit root to find old_roots */
2042                                 ret = btrfs_find_all_roots(NULL, fs_info,
2043                                                 record->bytenr, 0,
2044                                                 &record->old_roots, false);
2045                                 if (ret < 0)
2046                                         goto cleanup;
2047                         }
2048
2049                         /*
2050                          * Use SEQ_LAST as time_seq to do special search, which
2051                          * doesn't lock tree or delayed_refs and search current
2052                          * root. It's safe inside commit_transaction().
2053                          */
2054                         ret = btrfs_find_all_roots(trans, fs_info,
2055                                 record->bytenr, SEQ_LAST, &new_roots, false);
2056                         if (ret < 0)
2057                                 goto cleanup;
2058                         if (qgroup_to_skip) {
2059                                 ulist_del(new_roots, qgroup_to_skip, 0);
2060                                 ulist_del(record->old_roots, qgroup_to_skip,
2061                                           0);
2062                         }
2063                         ret = btrfs_qgroup_account_extent(trans, fs_info,
2064                                         record->bytenr, record->num_bytes,
2065                                         record->old_roots, new_roots);
2066                         record->old_roots = NULL;
2067                         new_roots = NULL;
2068                 }
2069 cleanup:
2070                 ulist_free(record->old_roots);
2071                 ulist_free(new_roots);
2072                 new_roots = NULL;
2073                 rb_erase(node, &delayed_refs->dirty_extent_root);
2074                 kfree(record);
2075
2076         }
2077         return ret;
2078 }
2079
2080 /*
2081  * called from commit_transaction. Writes all changed qgroups to disk.
2082  */
2083 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
2084                       struct btrfs_fs_info *fs_info)
2085 {
2086         struct btrfs_root *quota_root = fs_info->quota_root;
2087         int ret = 0;
2088
2089         if (!quota_root)
2090                 return ret;
2091
2092         spin_lock(&fs_info->qgroup_lock);
2093         while (!list_empty(&fs_info->dirty_qgroups)) {
2094                 struct btrfs_qgroup *qgroup;
2095                 qgroup = list_first_entry(&fs_info->dirty_qgroups,
2096                                           struct btrfs_qgroup, dirty);
2097                 list_del_init(&qgroup->dirty);
2098                 spin_unlock(&fs_info->qgroup_lock);
2099                 ret = update_qgroup_info_item(trans, quota_root, qgroup);
2100                 if (ret)
2101                         fs_info->qgroup_flags |=
2102                                         BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2103                 ret = update_qgroup_limit_item(trans, quota_root, qgroup);
2104                 if (ret)
2105                         fs_info->qgroup_flags |=
2106                                         BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2107                 spin_lock(&fs_info->qgroup_lock);
2108         }
2109         if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
2110                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
2111         else
2112                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
2113         spin_unlock(&fs_info->qgroup_lock);
2114
2115         ret = update_qgroup_status_item(trans, fs_info, quota_root);
2116         if (ret)
2117                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2118
2119         return ret;
2120 }
2121
2122 /*
2123  * Copy the accounting information between qgroups. This is necessary
2124  * when a snapshot or a subvolume is created. Throwing an error will
2125  * cause a transaction abort so we take extra care here to only error
2126  * when a readonly fs is a reasonable outcome.
2127  */
2128 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2129                          struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
2130                          struct btrfs_qgroup_inherit *inherit)
2131 {
2132         int ret = 0;
2133         int i;
2134         u64 *i_qgroups;
2135         struct btrfs_root *quota_root = fs_info->quota_root;
2136         struct btrfs_qgroup *srcgroup;
2137         struct btrfs_qgroup *dstgroup;
2138         u32 level_size = 0;
2139         u64 nums;
2140
2141         mutex_lock(&fs_info->qgroup_ioctl_lock);
2142         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
2143                 goto out;
2144
2145         if (!quota_root) {
2146                 ret = -EINVAL;
2147                 goto out;
2148         }
2149
2150         if (inherit) {
2151                 i_qgroups = (u64 *)(inherit + 1);
2152                 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
2153                        2 * inherit->num_excl_copies;
2154                 for (i = 0; i < nums; ++i) {
2155                         srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
2156
2157                         /*
2158                          * Zero out invalid groups so we can ignore
2159                          * them later.
2160                          */
2161                         if (!srcgroup ||
2162                             ((srcgroup->qgroupid >> 48) <= (objectid >> 48)))
2163                                 *i_qgroups = 0ULL;
2164
2165                         ++i_qgroups;
2166                 }
2167         }
2168
2169         /*
2170          * create a tracking group for the subvol itself
2171          */
2172         ret = add_qgroup_item(trans, quota_root, objectid);
2173         if (ret)
2174                 goto out;
2175
2176         if (srcid) {
2177                 struct btrfs_root *srcroot;
2178                 struct btrfs_key srckey;
2179
2180                 srckey.objectid = srcid;
2181                 srckey.type = BTRFS_ROOT_ITEM_KEY;
2182                 srckey.offset = (u64)-1;
2183                 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
2184                 if (IS_ERR(srcroot)) {
2185                         ret = PTR_ERR(srcroot);
2186                         goto out;
2187                 }
2188
2189                 level_size = fs_info->nodesize;
2190         }
2191
2192         /*
2193          * add qgroup to all inherited groups
2194          */
2195         if (inherit) {
2196                 i_qgroups = (u64 *)(inherit + 1);
2197                 for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) {
2198                         if (*i_qgroups == 0)
2199                                 continue;
2200                         ret = add_qgroup_relation_item(trans, quota_root,
2201                                                        objectid, *i_qgroups);
2202                         if (ret && ret != -EEXIST)
2203                                 goto out;
2204                         ret = add_qgroup_relation_item(trans, quota_root,
2205                                                        *i_qgroups, objectid);
2206                         if (ret && ret != -EEXIST)
2207                                 goto out;
2208                 }
2209                 ret = 0;
2210         }
2211
2212
2213         spin_lock(&fs_info->qgroup_lock);
2214
2215         dstgroup = add_qgroup_rb(fs_info, objectid);
2216         if (IS_ERR(dstgroup)) {
2217                 ret = PTR_ERR(dstgroup);
2218                 goto unlock;
2219         }
2220
2221         if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
2222                 dstgroup->lim_flags = inherit->lim.flags;
2223                 dstgroup->max_rfer = inherit->lim.max_rfer;
2224                 dstgroup->max_excl = inherit->lim.max_excl;
2225                 dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
2226                 dstgroup->rsv_excl = inherit->lim.rsv_excl;
2227
2228                 ret = update_qgroup_limit_item(trans, quota_root, dstgroup);
2229                 if (ret) {
2230                         fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2231                         btrfs_info(fs_info,
2232                                    "unable to update quota limit for %llu",
2233                                    dstgroup->qgroupid);
2234                         goto unlock;
2235                 }
2236         }
2237
2238         if (srcid) {
2239                 srcgroup = find_qgroup_rb(fs_info, srcid);
2240                 if (!srcgroup)
2241                         goto unlock;
2242
2243                 /*
2244                  * We call inherit after we clone the root in order to make sure
2245                  * our counts don't go crazy, so at this point the only
2246                  * difference between the two roots should be the root node.
2247                  */
2248                 dstgroup->rfer = srcgroup->rfer;
2249                 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
2250                 dstgroup->excl = level_size;
2251                 dstgroup->excl_cmpr = level_size;
2252                 srcgroup->excl = level_size;
2253                 srcgroup->excl_cmpr = level_size;
2254
2255                 /* inherit the limit info */
2256                 dstgroup->lim_flags = srcgroup->lim_flags;
2257                 dstgroup->max_rfer = srcgroup->max_rfer;
2258                 dstgroup->max_excl = srcgroup->max_excl;
2259                 dstgroup->rsv_rfer = srcgroup->rsv_rfer;
2260                 dstgroup->rsv_excl = srcgroup->rsv_excl;
2261
2262                 qgroup_dirty(fs_info, dstgroup);
2263                 qgroup_dirty(fs_info, srcgroup);
2264         }
2265
2266         if (!inherit)
2267                 goto unlock;
2268
2269         i_qgroups = (u64 *)(inherit + 1);
2270         for (i = 0; i < inherit->num_qgroups; ++i) {
2271                 if (*i_qgroups) {
2272                         ret = add_relation_rb(fs_info, objectid, *i_qgroups);
2273                         if (ret)
2274                                 goto unlock;
2275                 }
2276                 ++i_qgroups;
2277         }
2278
2279         for (i = 0; i <  inherit->num_ref_copies; ++i, i_qgroups += 2) {
2280                 struct btrfs_qgroup *src;
2281                 struct btrfs_qgroup *dst;
2282
2283                 if (!i_qgroups[0] || !i_qgroups[1])
2284                         continue;
2285
2286                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2287                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2288
2289                 if (!src || !dst) {
2290                         ret = -EINVAL;
2291                         goto unlock;
2292                 }
2293
2294                 dst->rfer = src->rfer - level_size;
2295                 dst->rfer_cmpr = src->rfer_cmpr - level_size;
2296         }
2297         for (i = 0; i <  inherit->num_excl_copies; ++i, i_qgroups += 2) {
2298                 struct btrfs_qgroup *src;
2299                 struct btrfs_qgroup *dst;
2300
2301                 if (!i_qgroups[0] || !i_qgroups[1])
2302                         continue;
2303
2304                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2305                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2306
2307                 if (!src || !dst) {
2308                         ret = -EINVAL;
2309                         goto unlock;
2310                 }
2311
2312                 dst->excl = src->excl + level_size;
2313                 dst->excl_cmpr = src->excl_cmpr + level_size;
2314         }
2315
2316 unlock:
2317         spin_unlock(&fs_info->qgroup_lock);
2318 out:
2319         mutex_unlock(&fs_info->qgroup_ioctl_lock);
2320         return ret;
2321 }
2322
2323 static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
2324 {
2325         if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
2326             qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer)
2327                 return false;
2328
2329         if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
2330             qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl)
2331                 return false;
2332
2333         return true;
2334 }
2335
2336 static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
2337 {
2338         struct btrfs_root *quota_root;
2339         struct btrfs_qgroup *qgroup;
2340         struct btrfs_fs_info *fs_info = root->fs_info;
2341         u64 ref_root = root->root_key.objectid;
2342         int ret = 0;
2343         int retried = 0;
2344         struct ulist_node *unode;
2345         struct ulist_iterator uiter;
2346
2347         if (!is_fstree(ref_root))
2348                 return 0;
2349
2350         if (num_bytes == 0)
2351                 return 0;
2352
2353         if (test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags) &&
2354             capable(CAP_SYS_RESOURCE))
2355                 enforce = false;
2356
2357 retry:
2358         spin_lock(&fs_info->qgroup_lock);
2359         quota_root = fs_info->quota_root;
2360         if (!quota_root)
2361                 goto out;
2362
2363         qgroup = find_qgroup_rb(fs_info, ref_root);
2364         if (!qgroup)
2365                 goto out;
2366
2367         /*
2368          * in a first step, we check all affected qgroups if any limits would
2369          * be exceeded
2370          */
2371         ulist_reinit(fs_info->qgroup_ulist);
2372         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
2373                         (uintptr_t)qgroup, GFP_ATOMIC);
2374         if (ret < 0)
2375                 goto out;
2376         ULIST_ITER_INIT(&uiter);
2377         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2378                 struct btrfs_qgroup *qg;
2379                 struct btrfs_qgroup_list *glist;
2380
2381                 qg = unode_aux_to_qgroup(unode);
2382
2383                 if (enforce && !qgroup_check_limits(qg, num_bytes)) {
2384                         /*
2385                          * Commit the tree and retry, since we may have
2386                          * deletions which would free up space.
2387                          */
2388                         if (!retried && qg->reserved > 0) {
2389                                 struct btrfs_trans_handle *trans;
2390
2391                                 spin_unlock(&fs_info->qgroup_lock);
2392                                 ret = btrfs_start_delalloc_inodes(root, 0);
2393                                 if (ret)
2394                                         return ret;
2395                                 btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
2396                                 trans = btrfs_join_transaction(root);
2397                                 if (IS_ERR(trans))
2398                                         return PTR_ERR(trans);
2399                                 ret = btrfs_commit_transaction(trans);
2400                                 if (ret)
2401                                         return ret;
2402                                 retried++;
2403                                 goto retry;
2404                         }
2405                         ret = -EDQUOT;
2406                         goto out;
2407                 }
2408
2409                 list_for_each_entry(glist, &qg->groups, next_group) {
2410                         ret = ulist_add(fs_info->qgroup_ulist,
2411                                         glist->group->qgroupid,
2412                                         (uintptr_t)glist->group, GFP_ATOMIC);
2413                         if (ret < 0)
2414                                 goto out;
2415                 }
2416         }
2417         ret = 0;
2418         /*
2419          * no limits exceeded, now record the reservation into all qgroups
2420          */
2421         ULIST_ITER_INIT(&uiter);
2422         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2423                 struct btrfs_qgroup *qg;
2424
2425                 qg = unode_aux_to_qgroup(unode);
2426
2427                 trace_qgroup_update_reserve(fs_info, qg, num_bytes);
2428                 qg->reserved += num_bytes;
2429         }
2430
2431 out:
2432         spin_unlock(&fs_info->qgroup_lock);
2433         return ret;
2434 }
2435
2436 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
2437                                u64 ref_root, u64 num_bytes)
2438 {
2439         struct btrfs_root *quota_root;
2440         struct btrfs_qgroup *qgroup;
2441         struct ulist_node *unode;
2442         struct ulist_iterator uiter;
2443         int ret = 0;
2444
2445         if (!is_fstree(ref_root))
2446                 return;
2447
2448         if (num_bytes == 0)
2449                 return;
2450
2451         spin_lock(&fs_info->qgroup_lock);
2452
2453         quota_root = fs_info->quota_root;
2454         if (!quota_root)
2455                 goto out;
2456
2457         qgroup = find_qgroup_rb(fs_info, ref_root);
2458         if (!qgroup)
2459                 goto out;
2460
2461         ulist_reinit(fs_info->qgroup_ulist);
2462         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
2463                         (uintptr_t)qgroup, GFP_ATOMIC);
2464         if (ret < 0)
2465                 goto out;
2466         ULIST_ITER_INIT(&uiter);
2467         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2468                 struct btrfs_qgroup *qg;
2469                 struct btrfs_qgroup_list *glist;
2470
2471                 qg = unode_aux_to_qgroup(unode);
2472
2473                 trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes);
2474                 if (qg->reserved < num_bytes)
2475                         report_reserved_underflow(fs_info, qg, num_bytes);
2476                 else
2477                         qg->reserved -= num_bytes;
2478
2479                 list_for_each_entry(glist, &qg->groups, next_group) {
2480                         ret = ulist_add(fs_info->qgroup_ulist,
2481                                         glist->group->qgroupid,
2482                                         (uintptr_t)glist->group, GFP_ATOMIC);
2483                         if (ret < 0)
2484                                 goto out;
2485                 }
2486         }
2487
2488 out:
2489         spin_unlock(&fs_info->qgroup_lock);
2490 }
2491
2492 /*
2493  * returns < 0 on error, 0 when more leafs are to be scanned.
2494  * returns 1 when done.
2495  */
2496 static int
2497 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
2498                    struct btrfs_trans_handle *trans)
2499 {
2500         struct btrfs_key found;
2501         struct extent_buffer *scratch_leaf = NULL;
2502         struct ulist *roots = NULL;
2503         struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
2504         u64 num_bytes;
2505         int slot;
2506         int ret;
2507
2508         mutex_lock(&fs_info->qgroup_rescan_lock);
2509         ret = btrfs_search_slot_for_read(fs_info->extent_root,
2510                                          &fs_info->qgroup_rescan_progress,
2511                                          path, 1, 0);
2512
2513         btrfs_debug(fs_info,
2514                 "current progress key (%llu %u %llu), search_slot ret %d",
2515                 fs_info->qgroup_rescan_progress.objectid,
2516                 fs_info->qgroup_rescan_progress.type,
2517                 fs_info->qgroup_rescan_progress.offset, ret);
2518
2519         if (ret) {
2520                 /*
2521                  * The rescan is about to end, we will not be scanning any
2522                  * further blocks. We cannot unset the RESCAN flag here, because
2523                  * we want to commit the transaction if everything went well.
2524                  * To make the live accounting work in this phase, we set our
2525                  * scan progress pointer such that every real extent objectid
2526                  * will be smaller.
2527                  */
2528                 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
2529                 btrfs_release_path(path);
2530                 mutex_unlock(&fs_info->qgroup_rescan_lock);
2531                 return ret;
2532         }
2533
2534         btrfs_item_key_to_cpu(path->nodes[0], &found,
2535                               btrfs_header_nritems(path->nodes[0]) - 1);
2536         fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
2537
2538         btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2539         scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]);
2540         if (!scratch_leaf) {
2541                 ret = -ENOMEM;
2542                 mutex_unlock(&fs_info->qgroup_rescan_lock);
2543                 goto out;
2544         }
2545         extent_buffer_get(scratch_leaf);
2546         btrfs_tree_read_lock(scratch_leaf);
2547         btrfs_set_lock_blocking_rw(scratch_leaf, BTRFS_READ_LOCK);
2548         slot = path->slots[0];
2549         btrfs_release_path(path);
2550         mutex_unlock(&fs_info->qgroup_rescan_lock);
2551
2552         for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
2553                 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
2554                 if (found.type != BTRFS_EXTENT_ITEM_KEY &&
2555                     found.type != BTRFS_METADATA_ITEM_KEY)
2556                         continue;
2557                 if (found.type == BTRFS_METADATA_ITEM_KEY)
2558                         num_bytes = fs_info->nodesize;
2559                 else
2560                         num_bytes = found.offset;
2561
2562                 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
2563                                            &roots, false);
2564                 if (ret < 0)
2565                         goto out;
2566                 /* For rescan, just pass old_roots as NULL */
2567                 ret = btrfs_qgroup_account_extent(trans, fs_info,
2568                                 found.objectid, num_bytes, NULL, roots);
2569                 if (ret < 0)
2570                         goto out;
2571         }
2572 out:
2573         if (scratch_leaf) {
2574                 btrfs_tree_read_unlock_blocking(scratch_leaf);
2575                 free_extent_buffer(scratch_leaf);
2576         }
2577         btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2578
2579         return ret;
2580 }
2581
2582 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2583 {
2584         struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
2585                                                      qgroup_rescan_work);
2586         struct btrfs_path *path;
2587         struct btrfs_trans_handle *trans = NULL;
2588         int err = -ENOMEM;
2589         int ret = 0;
2590
2591         path = btrfs_alloc_path();
2592         if (!path)
2593                 goto out;
2594
2595         err = 0;
2596         while (!err && !btrfs_fs_closing(fs_info)) {
2597                 trans = btrfs_start_transaction(fs_info->fs_root, 0);
2598                 if (IS_ERR(trans)) {
2599                         err = PTR_ERR(trans);
2600                         break;
2601                 }
2602                 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
2603                         err = -EINTR;
2604                 } else {
2605                         err = qgroup_rescan_leaf(fs_info, path, trans);
2606                 }
2607                 if (err > 0)
2608                         btrfs_commit_transaction(trans);
2609                 else
2610                         btrfs_end_transaction(trans);
2611         }
2612
2613 out:
2614         btrfs_free_path(path);
2615
2616         mutex_lock(&fs_info->qgroup_rescan_lock);
2617         if (!btrfs_fs_closing(fs_info))
2618                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2619
2620         if (err > 0 &&
2621             fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2622                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2623         } else if (err < 0) {
2624                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2625         }
2626         mutex_unlock(&fs_info->qgroup_rescan_lock);
2627
2628         /*
2629          * only update status, since the previous part has already updated the
2630          * qgroup info.
2631          */
2632         trans = btrfs_start_transaction(fs_info->quota_root, 1);
2633         if (IS_ERR(trans)) {
2634                 err = PTR_ERR(trans);
2635                 btrfs_err(fs_info,
2636                           "fail to start transaction for status update: %d",
2637                           err);
2638                 goto done;
2639         }
2640         ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
2641         if (ret < 0) {
2642                 err = ret;
2643                 btrfs_err(fs_info, "fail to update qgroup status: %d", err);
2644         }
2645         btrfs_end_transaction(trans);
2646
2647         if (btrfs_fs_closing(fs_info)) {
2648                 btrfs_info(fs_info, "qgroup scan paused");
2649         } else if (err >= 0) {
2650                 btrfs_info(fs_info, "qgroup scan completed%s",
2651                         err > 0 ? " (inconsistency flag cleared)" : "");
2652         } else {
2653                 btrfs_err(fs_info, "qgroup scan failed with %d", err);
2654         }
2655
2656 done:
2657         mutex_lock(&fs_info->qgroup_rescan_lock);
2658         fs_info->qgroup_rescan_running = false;
2659         mutex_unlock(&fs_info->qgroup_rescan_lock);
2660         complete_all(&fs_info->qgroup_rescan_completion);
2661 }
2662
2663 /*
2664  * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2665  * memory required for the rescan context.
2666  */
2667 static int
2668 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2669                    int init_flags)
2670 {
2671         int ret = 0;
2672
2673         if (!init_flags &&
2674             (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
2675              !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
2676                 ret = -EINVAL;
2677                 goto err;
2678         }
2679
2680         mutex_lock(&fs_info->qgroup_rescan_lock);
2681         spin_lock(&fs_info->qgroup_lock);
2682
2683         if (init_flags) {
2684                 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2685                         ret = -EINPROGRESS;
2686                 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2687                         ret = -EINVAL;
2688
2689                 if (ret) {
2690                         spin_unlock(&fs_info->qgroup_lock);
2691                         mutex_unlock(&fs_info->qgroup_rescan_lock);
2692                         goto err;
2693                 }
2694                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2695         }
2696
2697         memset(&fs_info->qgroup_rescan_progress, 0,
2698                 sizeof(fs_info->qgroup_rescan_progress));
2699         fs_info->qgroup_rescan_progress.objectid = progress_objectid;
2700         init_completion(&fs_info->qgroup_rescan_completion);
2701         fs_info->qgroup_rescan_running = true;
2702
2703         spin_unlock(&fs_info->qgroup_lock);
2704         mutex_unlock(&fs_info->qgroup_rescan_lock);
2705
2706         memset(&fs_info->qgroup_rescan_work, 0,
2707                sizeof(fs_info->qgroup_rescan_work));
2708         btrfs_init_work(&fs_info->qgroup_rescan_work,
2709                         btrfs_qgroup_rescan_helper,
2710                         btrfs_qgroup_rescan_worker, NULL, NULL);
2711
2712         if (ret) {
2713 err:
2714                 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
2715                 return ret;
2716         }
2717
2718         return 0;
2719 }
2720
2721 static void
2722 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
2723 {
2724         struct rb_node *n;
2725         struct btrfs_qgroup *qgroup;
2726
2727         spin_lock(&fs_info->qgroup_lock);
2728         /* clear all current qgroup tracking information */
2729         for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2730                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
2731                 qgroup->rfer = 0;
2732                 qgroup->rfer_cmpr = 0;
2733                 qgroup->excl = 0;
2734                 qgroup->excl_cmpr = 0;
2735         }
2736         spin_unlock(&fs_info->qgroup_lock);
2737 }
2738
2739 int
2740 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2741 {
2742         int ret = 0;
2743         struct btrfs_trans_handle *trans;
2744
2745         ret = qgroup_rescan_init(fs_info, 0, 1);
2746         if (ret)
2747                 return ret;
2748
2749         /*
2750          * We have set the rescan_progress to 0, which means no more
2751          * delayed refs will be accounted by btrfs_qgroup_account_ref.
2752          * However, btrfs_qgroup_account_ref may be right after its call
2753          * to btrfs_find_all_roots, in which case it would still do the
2754          * accounting.
2755          * To solve this, we're committing the transaction, which will
2756          * ensure we run all delayed refs and only after that, we are
2757          * going to clear all tracking information for a clean start.
2758          */
2759
2760         trans = btrfs_join_transaction(fs_info->fs_root);
2761         if (IS_ERR(trans)) {
2762                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2763                 return PTR_ERR(trans);
2764         }
2765         ret = btrfs_commit_transaction(trans);
2766         if (ret) {
2767                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2768                 return ret;
2769         }
2770
2771         qgroup_rescan_zero_tracking(fs_info);
2772
2773         btrfs_queue_work(fs_info->qgroup_rescan_workers,
2774                          &fs_info->qgroup_rescan_work);
2775
2776         return 0;
2777 }
2778
2779 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
2780                                      bool interruptible)
2781 {
2782         int running;
2783         int ret = 0;
2784
2785         mutex_lock(&fs_info->qgroup_rescan_lock);
2786         spin_lock(&fs_info->qgroup_lock);
2787         running = fs_info->qgroup_rescan_running;
2788         spin_unlock(&fs_info->qgroup_lock);
2789         mutex_unlock(&fs_info->qgroup_rescan_lock);
2790
2791         if (!running)
2792                 return 0;
2793
2794         if (interruptible)
2795                 ret = wait_for_completion_interruptible(
2796                                         &fs_info->qgroup_rescan_completion);
2797         else
2798                 wait_for_completion(&fs_info->qgroup_rescan_completion);
2799
2800         return ret;
2801 }
2802
2803 /*
2804  * this is only called from open_ctree where we're still single threaded, thus
2805  * locking is omitted here.
2806  */
2807 void
2808 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
2809 {
2810         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2811                 btrfs_queue_work(fs_info->qgroup_rescan_workers,
2812                                  &fs_info->qgroup_rescan_work);
2813 }
2814
2815 /*
2816  * Reserve qgroup space for range [start, start + len).
2817  *
2818  * This function will either reserve space from related qgroups or doing
2819  * nothing if the range is already reserved.
2820  *
2821  * Return 0 for successful reserve
2822  * Return <0 for error (including -EQUOT)
2823  *
2824  * NOTE: this function may sleep for memory allocation.
2825  *       if btrfs_qgroup_reserve_data() is called multiple times with
2826  *       same @reserved, caller must ensure when error happens it's OK
2827  *       to free *ALL* reserved space.
2828  */
2829 int btrfs_qgroup_reserve_data(struct inode *inode,
2830                         struct extent_changeset **reserved_ret, u64 start,
2831                         u64 len)
2832 {
2833         struct btrfs_root *root = BTRFS_I(inode)->root;
2834         struct ulist_node *unode;
2835         struct ulist_iterator uiter;
2836         struct extent_changeset *reserved;
2837         u64 orig_reserved;
2838         u64 to_reserve;
2839         int ret;
2840
2841         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) ||
2842             !is_fstree(root->objectid) || len == 0)
2843                 return 0;
2844
2845         /* @reserved parameter is mandatory for qgroup */
2846         if (WARN_ON(!reserved_ret))
2847                 return -EINVAL;
2848         if (!*reserved_ret) {
2849                 *reserved_ret = extent_changeset_alloc();
2850                 if (!*reserved_ret)
2851                         return -ENOMEM;
2852         }
2853         reserved = *reserved_ret;
2854         /* Record already reserved space */
2855         orig_reserved = reserved->bytes_changed;
2856         ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
2857                         start + len -1, EXTENT_QGROUP_RESERVED, reserved);
2858
2859         /* Newly reserved space */
2860         to_reserve = reserved->bytes_changed - orig_reserved;
2861         trace_btrfs_qgroup_reserve_data(inode, start, len,
2862                                         to_reserve, QGROUP_RESERVE);
2863         if (ret < 0)
2864                 goto cleanup;
2865         ret = qgroup_reserve(root, to_reserve, true);
2866         if (ret < 0)
2867                 goto cleanup;
2868
2869         return ret;
2870
2871 cleanup:
2872         /* cleanup *ALL* already reserved ranges */
2873         ULIST_ITER_INIT(&uiter);
2874         while ((unode = ulist_next(&reserved->range_changed, &uiter)))
2875                 clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
2876                                  unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL);
2877         extent_changeset_release(reserved);
2878         return ret;
2879 }
2880
2881 /* Free ranges specified by @reserved, normally in error path */
2882 static int qgroup_free_reserved_data(struct inode *inode,
2883                         struct extent_changeset *reserved, u64 start, u64 len)
2884 {
2885         struct btrfs_root *root = BTRFS_I(inode)->root;
2886         struct ulist_node *unode;
2887         struct ulist_iterator uiter;
2888         struct extent_changeset changeset;
2889         int freed = 0;
2890         int ret;
2891
2892         extent_changeset_init(&changeset);
2893         len = round_up(start + len, root->fs_info->sectorsize);
2894         start = round_down(start, root->fs_info->sectorsize);
2895
2896         ULIST_ITER_INIT(&uiter);
2897         while ((unode = ulist_next(&reserved->range_changed, &uiter))) {
2898                 u64 range_start = unode->val;
2899                 /* unode->aux is the inclusive end */
2900                 u64 range_len = unode->aux - range_start + 1;
2901                 u64 free_start;
2902                 u64 free_len;
2903
2904                 extent_changeset_release(&changeset);
2905
2906                 /* Only free range in range [start, start + len) */
2907                 if (range_start >= start + len ||
2908                     range_start + range_len <= start)
2909                         continue;
2910                 free_start = max(range_start, start);
2911                 free_len = min(start + len, range_start + range_len) -
2912                            free_start;
2913                 /*
2914                  * TODO: To also modify reserved->ranges_reserved to reflect
2915                  * the modification.
2916                  *
2917                  * However as long as we free qgroup reserved according to
2918                  * EXTENT_QGROUP_RESERVED, we won't double free.
2919                  * So not need to rush.
2920                  */
2921                 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_failure_tree,
2922                                 free_start, free_start + free_len - 1,
2923                                 EXTENT_QGROUP_RESERVED, &changeset);
2924                 if (ret < 0)
2925                         goto out;
2926                 freed += changeset.bytes_changed;
2927         }
2928         btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed);
2929         ret = freed;
2930 out:
2931         extent_changeset_release(&changeset);
2932         return ret;
2933 }
2934
2935 static int __btrfs_qgroup_release_data(struct inode *inode,
2936                         struct extent_changeset *reserved, u64 start, u64 len,
2937                         int free)
2938 {
2939         struct extent_changeset changeset;
2940         int trace_op = QGROUP_RELEASE;
2941         int ret;
2942
2943         /* In release case, we shouldn't have @reserved */
2944         WARN_ON(!free && reserved);
2945         if (free && reserved)
2946                 return qgroup_free_reserved_data(inode, reserved, start, len);
2947         extent_changeset_init(&changeset);
2948         ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 
2949                         start + len -1, EXTENT_QGROUP_RESERVED, &changeset);
2950         if (ret < 0)
2951                 goto out;
2952
2953         if (free)
2954                 trace_op = QGROUP_FREE;
2955         trace_btrfs_qgroup_release_data(inode, start, len,
2956                                         changeset.bytes_changed, trace_op);
2957         if (free)
2958                 btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
2959                                 BTRFS_I(inode)->root->objectid,
2960                                 changeset.bytes_changed);
2961         ret = changeset.bytes_changed;
2962 out:
2963         extent_changeset_release(&changeset);
2964         return ret;
2965 }
2966
2967 /*
2968  * Free a reserved space range from io_tree and related qgroups
2969  *
2970  * Should be called when a range of pages get invalidated before reaching disk.
2971  * Or for error cleanup case.
2972  * if @reserved is given, only reserved range in [@start, @start + @len) will
2973  * be freed.
2974  *
2975  * For data written to disk, use btrfs_qgroup_release_data().
2976  *
2977  * NOTE: This function may sleep for memory allocation.
2978  */
2979 int btrfs_qgroup_free_data(struct inode *inode,
2980                         struct extent_changeset *reserved, u64 start, u64 len)
2981 {
2982         return __btrfs_qgroup_release_data(inode, reserved, start, len, 1);
2983 }
2984
2985 /*
2986  * Release a reserved space range from io_tree only.
2987  *
2988  * Should be called when a range of pages get written to disk and corresponding
2989  * FILE_EXTENT is inserted into corresponding root.
2990  *
2991  * Since new qgroup accounting framework will only update qgroup numbers at
2992  * commit_transaction() time, its reserved space shouldn't be freed from
2993  * related qgroups.
2994  *
2995  * But we should release the range from io_tree, to allow further write to be
2996  * COWed.
2997  *
2998  * NOTE: This function may sleep for memory allocation.
2999  */
3000 int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
3001 {
3002         return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
3003 }
3004
3005 int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
3006                               bool enforce)
3007 {
3008         struct btrfs_fs_info *fs_info = root->fs_info;
3009         int ret;
3010
3011         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
3012             !is_fstree(root->objectid) || num_bytes == 0)
3013                 return 0;
3014
3015         BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
3016         trace_qgroup_meta_reserve(root, (s64)num_bytes);
3017         ret = qgroup_reserve(root, num_bytes, enforce);
3018         if (ret < 0)
3019                 return ret;
3020         atomic64_add(num_bytes, &root->qgroup_meta_rsv);
3021         return ret;
3022 }
3023
3024 void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
3025 {
3026         struct btrfs_fs_info *fs_info = root->fs_info;
3027         u64 reserved;
3028
3029         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
3030             !is_fstree(root->objectid))
3031                 return;
3032
3033         reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0);
3034         if (reserved == 0)
3035                 return;
3036         trace_qgroup_meta_reserve(root, -(s64)reserved);
3037         btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved);
3038 }
3039
3040 void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
3041 {
3042         struct btrfs_fs_info *fs_info = root->fs_info;
3043
3044         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
3045             !is_fstree(root->objectid))
3046                 return;
3047
3048         BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
3049         WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes);
3050         atomic64_sub(num_bytes, &root->qgroup_meta_rsv);
3051         trace_qgroup_meta_reserve(root, -(s64)num_bytes);
3052         btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes);
3053 }
3054
3055 /*
3056  * Check qgroup reserved space leaking, normally at destroy inode
3057  * time
3058  */
3059 void btrfs_qgroup_check_reserved_leak(struct inode *inode)
3060 {
3061         struct extent_changeset changeset;
3062         struct ulist_node *unode;
3063         struct ulist_iterator iter;
3064         int ret;
3065
3066         extent_changeset_init(&changeset);
3067         ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
3068                         EXTENT_QGROUP_RESERVED, &changeset);
3069
3070         WARN_ON(ret < 0);
3071         if (WARN_ON(changeset.bytes_changed)) {
3072                 ULIST_ITER_INIT(&iter);
3073                 while ((unode = ulist_next(&changeset.range_changed, &iter))) {
3074                         btrfs_warn(BTRFS_I(inode)->root->fs_info,
3075                                 "leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu",
3076                                 inode->i_ino, unode->val, unode->aux);
3077                 }
3078                 btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
3079                                 BTRFS_I(inode)->root->objectid,
3080                                 changeset.bytes_changed);
3081
3082         }
3083         extent_changeset_release(&changeset);
3084 }