btrfs: qgroup: Fix wrong qgroup reservation update for relationship modification
[sfrench/cifs-2.6.git] / fs / btrfs / qgroup.c
1 /*
2  * Copyright (C) 2011 STRATO.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/pagemap.h>
21 #include <linux/writeback.h>
22 #include <linux/blkdev.h>
23 #include <linux/rbtree.h>
24 #include <linux/slab.h>
25 #include <linux/workqueue.h>
26 #include <linux/btrfs.h>
27
28 #include "ctree.h"
29 #include "transaction.h"
30 #include "disk-io.h"
31 #include "locking.h"
32 #include "ulist.h"
33 #include "backref.h"
34 #include "extent_io.h"
35 #include "qgroup.h"
36
37
38 /* TODO XXX FIXME
39  *  - subvol delete -> delete when ref goes to 0? delete limits also?
40  *  - reorganize keys
41  *  - compressed
42  *  - sync
43  *  - copy also limits on subvol creation
44  *  - limit
45  *  - caches fuer ulists
46  *  - performance benchmarks
47  *  - check all ioctl parameters
48  */
49
50 /*
51  * Helpers to access qgroup reservation
52  *
53  * Callers should ensure the lock context and type are valid
54  */
55
56 static u64 qgroup_rsv_total(const struct btrfs_qgroup *qgroup)
57 {
58         u64 ret = 0;
59         int i;
60
61         for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
62                 ret += qgroup->rsv.values[i];
63
64         return ret;
65 }
66
67 #ifdef CONFIG_BTRFS_DEBUG
68 static const char *qgroup_rsv_type_str(enum btrfs_qgroup_rsv_type type)
69 {
70         if (type == BTRFS_QGROUP_RSV_DATA)
71                 return "data";
72         if (type == BTRFS_QGROUP_RSV_META)
73                 return "meta";
74         return NULL;
75 }
76 #endif
77
78 static void qgroup_rsv_add(struct btrfs_qgroup *qgroup, u64 num_bytes,
79                            enum btrfs_qgroup_rsv_type type)
80 {
81         qgroup->rsv.values[type] += num_bytes;
82 }
83
84 static void qgroup_rsv_release(struct btrfs_qgroup *qgroup, u64 num_bytes,
85                                enum btrfs_qgroup_rsv_type type)
86 {
87         if (qgroup->rsv.values[type] >= num_bytes) {
88                 qgroup->rsv.values[type] -= num_bytes;
89                 return;
90         }
91 #ifdef CONFIG_BTRFS_DEBUG
92         WARN_RATELIMIT(1,
93                 "qgroup %llu %s reserved space underflow, have %llu to free %llu",
94                 qgroup->qgroupid, qgroup_rsv_type_str(type),
95                 qgroup->rsv.values[type], num_bytes);
96 #endif
97         qgroup->rsv.values[type] = 0;
98 }
99
100 static void qgroup_rsv_add_by_qgroup(struct btrfs_qgroup *dest,
101                                           struct btrfs_qgroup *src)
102 {
103         int i;
104
105         for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
106                 qgroup_rsv_add(dest, src->rsv.values[i], i);
107 }
108
109 static void qgroup_rsv_release_by_qgroup(struct btrfs_qgroup *dest,
110                                           struct btrfs_qgroup *src)
111 {
112         int i;
113
114         for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
115                 qgroup_rsv_release(dest, src->rsv.values[i], i);
116 }
117
118 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
119                                            int mod)
120 {
121         if (qg->old_refcnt < seq)
122                 qg->old_refcnt = seq;
123         qg->old_refcnt += mod;
124 }
125
126 static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq,
127                                            int mod)
128 {
129         if (qg->new_refcnt < seq)
130                 qg->new_refcnt = seq;
131         qg->new_refcnt += mod;
132 }
133
134 static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq)
135 {
136         if (qg->old_refcnt < seq)
137                 return 0;
138         return qg->old_refcnt - seq;
139 }
140
141 static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq)
142 {
143         if (qg->new_refcnt < seq)
144                 return 0;
145         return qg->new_refcnt - seq;
146 }
147
148 /*
149  * glue structure to represent the relations between qgroups.
150  */
151 struct btrfs_qgroup_list {
152         struct list_head next_group;
153         struct list_head next_member;
154         struct btrfs_qgroup *group;
155         struct btrfs_qgroup *member;
156 };
157
158 static inline u64 qgroup_to_aux(struct btrfs_qgroup *qg)
159 {
160         return (u64)(uintptr_t)qg;
161 }
162
163 static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n)
164 {
165         return (struct btrfs_qgroup *)(uintptr_t)n->aux;
166 }
167
168 static int
169 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
170                    int init_flags);
171 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
172
173 /* must be called with qgroup_ioctl_lock held */
174 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
175                                            u64 qgroupid)
176 {
177         struct rb_node *n = fs_info->qgroup_tree.rb_node;
178         struct btrfs_qgroup *qgroup;
179
180         while (n) {
181                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
182                 if (qgroup->qgroupid < qgroupid)
183                         n = n->rb_left;
184                 else if (qgroup->qgroupid > qgroupid)
185                         n = n->rb_right;
186                 else
187                         return qgroup;
188         }
189         return NULL;
190 }
191
192 /* must be called with qgroup_lock held */
193 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
194                                           u64 qgroupid)
195 {
196         struct rb_node **p = &fs_info->qgroup_tree.rb_node;
197         struct rb_node *parent = NULL;
198         struct btrfs_qgroup *qgroup;
199
200         while (*p) {
201                 parent = *p;
202                 qgroup = rb_entry(parent, struct btrfs_qgroup, node);
203
204                 if (qgroup->qgroupid < qgroupid)
205                         p = &(*p)->rb_left;
206                 else if (qgroup->qgroupid > qgroupid)
207                         p = &(*p)->rb_right;
208                 else
209                         return qgroup;
210         }
211
212         qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
213         if (!qgroup)
214                 return ERR_PTR(-ENOMEM);
215
216         qgroup->qgroupid = qgroupid;
217         INIT_LIST_HEAD(&qgroup->groups);
218         INIT_LIST_HEAD(&qgroup->members);
219         INIT_LIST_HEAD(&qgroup->dirty);
220
221         rb_link_node(&qgroup->node, parent, p);
222         rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
223
224         return qgroup;
225 }
226
227 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
228 {
229         struct btrfs_qgroup_list *list;
230
231         list_del(&qgroup->dirty);
232         while (!list_empty(&qgroup->groups)) {
233                 list = list_first_entry(&qgroup->groups,
234                                         struct btrfs_qgroup_list, next_group);
235                 list_del(&list->next_group);
236                 list_del(&list->next_member);
237                 kfree(list);
238         }
239
240         while (!list_empty(&qgroup->members)) {
241                 list = list_first_entry(&qgroup->members,
242                                         struct btrfs_qgroup_list, next_member);
243                 list_del(&list->next_group);
244                 list_del(&list->next_member);
245                 kfree(list);
246         }
247         kfree(qgroup);
248 }
249
250 /* must be called with qgroup_lock held */
251 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
252 {
253         struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
254
255         if (!qgroup)
256                 return -ENOENT;
257
258         rb_erase(&qgroup->node, &fs_info->qgroup_tree);
259         __del_qgroup_rb(qgroup);
260         return 0;
261 }
262
263 /* must be called with qgroup_lock held */
264 static int add_relation_rb(struct btrfs_fs_info *fs_info,
265                            u64 memberid, u64 parentid)
266 {
267         struct btrfs_qgroup *member;
268         struct btrfs_qgroup *parent;
269         struct btrfs_qgroup_list *list;
270
271         member = find_qgroup_rb(fs_info, memberid);
272         parent = find_qgroup_rb(fs_info, parentid);
273         if (!member || !parent)
274                 return -ENOENT;
275
276         list = kzalloc(sizeof(*list), GFP_ATOMIC);
277         if (!list)
278                 return -ENOMEM;
279
280         list->group = parent;
281         list->member = member;
282         list_add_tail(&list->next_group, &member->groups);
283         list_add_tail(&list->next_member, &parent->members);
284
285         return 0;
286 }
287
288 /* must be called with qgroup_lock held */
289 static int del_relation_rb(struct btrfs_fs_info *fs_info,
290                            u64 memberid, u64 parentid)
291 {
292         struct btrfs_qgroup *member;
293         struct btrfs_qgroup *parent;
294         struct btrfs_qgroup_list *list;
295
296         member = find_qgroup_rb(fs_info, memberid);
297         parent = find_qgroup_rb(fs_info, parentid);
298         if (!member || !parent)
299                 return -ENOENT;
300
301         list_for_each_entry(list, &member->groups, next_group) {
302                 if (list->group == parent) {
303                         list_del(&list->next_group);
304                         list_del(&list->next_member);
305                         kfree(list);
306                         return 0;
307                 }
308         }
309         return -ENOENT;
310 }
311
312 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
313 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
314                                u64 rfer, u64 excl)
315 {
316         struct btrfs_qgroup *qgroup;
317
318         qgroup = find_qgroup_rb(fs_info, qgroupid);
319         if (!qgroup)
320                 return -EINVAL;
321         if (qgroup->rfer != rfer || qgroup->excl != excl)
322                 return -EINVAL;
323         return 0;
324 }
325 #endif
326
327 /*
328  * The full config is read in one go, only called from open_ctree()
329  * It doesn't use any locking, as at this point we're still single-threaded
330  */
331 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
332 {
333         struct btrfs_key key;
334         struct btrfs_key found_key;
335         struct btrfs_root *quota_root = fs_info->quota_root;
336         struct btrfs_path *path = NULL;
337         struct extent_buffer *l;
338         int slot;
339         int ret = 0;
340         u64 flags = 0;
341         u64 rescan_progress = 0;
342
343         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
344                 return 0;
345
346         fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
347         if (!fs_info->qgroup_ulist) {
348                 ret = -ENOMEM;
349                 goto out;
350         }
351
352         path = btrfs_alloc_path();
353         if (!path) {
354                 ret = -ENOMEM;
355                 goto out;
356         }
357
358         /* default this to quota off, in case no status key is found */
359         fs_info->qgroup_flags = 0;
360
361         /*
362          * pass 1: read status, all qgroup infos and limits
363          */
364         key.objectid = 0;
365         key.type = 0;
366         key.offset = 0;
367         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
368         if (ret)
369                 goto out;
370
371         while (1) {
372                 struct btrfs_qgroup *qgroup;
373
374                 slot = path->slots[0];
375                 l = path->nodes[0];
376                 btrfs_item_key_to_cpu(l, &found_key, slot);
377
378                 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
379                         struct btrfs_qgroup_status_item *ptr;
380
381                         ptr = btrfs_item_ptr(l, slot,
382                                              struct btrfs_qgroup_status_item);
383
384                         if (btrfs_qgroup_status_version(l, ptr) !=
385                             BTRFS_QGROUP_STATUS_VERSION) {
386                                 btrfs_err(fs_info,
387                                  "old qgroup version, quota disabled");
388                                 goto out;
389                         }
390                         if (btrfs_qgroup_status_generation(l, ptr) !=
391                             fs_info->generation) {
392                                 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
393                                 btrfs_err(fs_info,
394                                         "qgroup generation mismatch, marked as inconsistent");
395                         }
396                         fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
397                                                                           ptr);
398                         rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
399                         goto next1;
400                 }
401
402                 if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
403                     found_key.type != BTRFS_QGROUP_LIMIT_KEY)
404                         goto next1;
405
406                 qgroup = find_qgroup_rb(fs_info, found_key.offset);
407                 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
408                     (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
409                         btrfs_err(fs_info, "inconsistent qgroup config");
410                         flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
411                 }
412                 if (!qgroup) {
413                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
414                         if (IS_ERR(qgroup)) {
415                                 ret = PTR_ERR(qgroup);
416                                 goto out;
417                         }
418                 }
419                 switch (found_key.type) {
420                 case BTRFS_QGROUP_INFO_KEY: {
421                         struct btrfs_qgroup_info_item *ptr;
422
423                         ptr = btrfs_item_ptr(l, slot,
424                                              struct btrfs_qgroup_info_item);
425                         qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
426                         qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
427                         qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
428                         qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
429                         /* generation currently unused */
430                         break;
431                 }
432                 case BTRFS_QGROUP_LIMIT_KEY: {
433                         struct btrfs_qgroup_limit_item *ptr;
434
435                         ptr = btrfs_item_ptr(l, slot,
436                                              struct btrfs_qgroup_limit_item);
437                         qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
438                         qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
439                         qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
440                         qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
441                         qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
442                         break;
443                 }
444                 }
445 next1:
446                 ret = btrfs_next_item(quota_root, path);
447                 if (ret < 0)
448                         goto out;
449                 if (ret)
450                         break;
451         }
452         btrfs_release_path(path);
453
454         /*
455          * pass 2: read all qgroup relations
456          */
457         key.objectid = 0;
458         key.type = BTRFS_QGROUP_RELATION_KEY;
459         key.offset = 0;
460         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
461         if (ret)
462                 goto out;
463         while (1) {
464                 slot = path->slots[0];
465                 l = path->nodes[0];
466                 btrfs_item_key_to_cpu(l, &found_key, slot);
467
468                 if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
469                         goto next2;
470
471                 if (found_key.objectid > found_key.offset) {
472                         /* parent <- member, not needed to build config */
473                         /* FIXME should we omit the key completely? */
474                         goto next2;
475                 }
476
477                 ret = add_relation_rb(fs_info, found_key.objectid,
478                                       found_key.offset);
479                 if (ret == -ENOENT) {
480                         btrfs_warn(fs_info,
481                                 "orphan qgroup relation 0x%llx->0x%llx",
482                                 found_key.objectid, found_key.offset);
483                         ret = 0;        /* ignore the error */
484                 }
485                 if (ret)
486                         goto out;
487 next2:
488                 ret = btrfs_next_item(quota_root, path);
489                 if (ret < 0)
490                         goto out;
491                 if (ret)
492                         break;
493         }
494 out:
495         fs_info->qgroup_flags |= flags;
496         if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
497                 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
498         else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
499                  ret >= 0)
500                 ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
501         btrfs_free_path(path);
502
503         if (ret < 0) {
504                 ulist_free(fs_info->qgroup_ulist);
505                 fs_info->qgroup_ulist = NULL;
506                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
507         }
508
509         return ret < 0 ? ret : 0;
510 }
511
512 /*
513  * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
514  * first two are in single-threaded paths.And for the third one, we have set
515  * quota_root to be null with qgroup_lock held before, so it is safe to clean
516  * up the in-memory structures without qgroup_lock held.
517  */
518 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
519 {
520         struct rb_node *n;
521         struct btrfs_qgroup *qgroup;
522
523         while ((n = rb_first(&fs_info->qgroup_tree))) {
524                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
525                 rb_erase(n, &fs_info->qgroup_tree);
526                 __del_qgroup_rb(qgroup);
527         }
528         /*
529          * we call btrfs_free_qgroup_config() when umounting
530          * filesystem and disabling quota, so we set qgroup_ulist
531          * to be null here to avoid double free.
532          */
533         ulist_free(fs_info->qgroup_ulist);
534         fs_info->qgroup_ulist = NULL;
535 }
536
537 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
538                                     struct btrfs_root *quota_root,
539                                     u64 src, u64 dst)
540 {
541         int ret;
542         struct btrfs_path *path;
543         struct btrfs_key key;
544
545         path = btrfs_alloc_path();
546         if (!path)
547                 return -ENOMEM;
548
549         key.objectid = src;
550         key.type = BTRFS_QGROUP_RELATION_KEY;
551         key.offset = dst;
552
553         ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
554
555         btrfs_mark_buffer_dirty(path->nodes[0]);
556
557         btrfs_free_path(path);
558         return ret;
559 }
560
561 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
562                                     struct btrfs_root *quota_root,
563                                     u64 src, u64 dst)
564 {
565         int ret;
566         struct btrfs_path *path;
567         struct btrfs_key key;
568
569         path = btrfs_alloc_path();
570         if (!path)
571                 return -ENOMEM;
572
573         key.objectid = src;
574         key.type = BTRFS_QGROUP_RELATION_KEY;
575         key.offset = dst;
576
577         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
578         if (ret < 0)
579                 goto out;
580
581         if (ret > 0) {
582                 ret = -ENOENT;
583                 goto out;
584         }
585
586         ret = btrfs_del_item(trans, quota_root, path);
587 out:
588         btrfs_free_path(path);
589         return ret;
590 }
591
592 static int add_qgroup_item(struct btrfs_trans_handle *trans,
593                            struct btrfs_root *quota_root, u64 qgroupid)
594 {
595         int ret;
596         struct btrfs_path *path;
597         struct btrfs_qgroup_info_item *qgroup_info;
598         struct btrfs_qgroup_limit_item *qgroup_limit;
599         struct extent_buffer *leaf;
600         struct btrfs_key key;
601
602         if (btrfs_is_testing(quota_root->fs_info))
603                 return 0;
604
605         path = btrfs_alloc_path();
606         if (!path)
607                 return -ENOMEM;
608
609         key.objectid = 0;
610         key.type = BTRFS_QGROUP_INFO_KEY;
611         key.offset = qgroupid;
612
613         /*
614          * Avoid a transaction abort by catching -EEXIST here. In that
615          * case, we proceed by re-initializing the existing structure
616          * on disk.
617          */
618
619         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
620                                       sizeof(*qgroup_info));
621         if (ret && ret != -EEXIST)
622                 goto out;
623
624         leaf = path->nodes[0];
625         qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
626                                  struct btrfs_qgroup_info_item);
627         btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
628         btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
629         btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
630         btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
631         btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
632
633         btrfs_mark_buffer_dirty(leaf);
634
635         btrfs_release_path(path);
636
637         key.type = BTRFS_QGROUP_LIMIT_KEY;
638         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
639                                       sizeof(*qgroup_limit));
640         if (ret && ret != -EEXIST)
641                 goto out;
642
643         leaf = path->nodes[0];
644         qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
645                                   struct btrfs_qgroup_limit_item);
646         btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
647         btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
648         btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
649         btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
650         btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
651
652         btrfs_mark_buffer_dirty(leaf);
653
654         ret = 0;
655 out:
656         btrfs_free_path(path);
657         return ret;
658 }
659
660 static int del_qgroup_item(struct btrfs_trans_handle *trans,
661                            struct btrfs_root *quota_root, u64 qgroupid)
662 {
663         int ret;
664         struct btrfs_path *path;
665         struct btrfs_key key;
666
667         path = btrfs_alloc_path();
668         if (!path)
669                 return -ENOMEM;
670
671         key.objectid = 0;
672         key.type = BTRFS_QGROUP_INFO_KEY;
673         key.offset = qgroupid;
674         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
675         if (ret < 0)
676                 goto out;
677
678         if (ret > 0) {
679                 ret = -ENOENT;
680                 goto out;
681         }
682
683         ret = btrfs_del_item(trans, quota_root, path);
684         if (ret)
685                 goto out;
686
687         btrfs_release_path(path);
688
689         key.type = BTRFS_QGROUP_LIMIT_KEY;
690         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
691         if (ret < 0)
692                 goto out;
693
694         if (ret > 0) {
695                 ret = -ENOENT;
696                 goto out;
697         }
698
699         ret = btrfs_del_item(trans, quota_root, path);
700
701 out:
702         btrfs_free_path(path);
703         return ret;
704 }
705
706 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
707                                     struct btrfs_root *root,
708                                     struct btrfs_qgroup *qgroup)
709 {
710         struct btrfs_path *path;
711         struct btrfs_key key;
712         struct extent_buffer *l;
713         struct btrfs_qgroup_limit_item *qgroup_limit;
714         int ret;
715         int slot;
716
717         key.objectid = 0;
718         key.type = BTRFS_QGROUP_LIMIT_KEY;
719         key.offset = qgroup->qgroupid;
720
721         path = btrfs_alloc_path();
722         if (!path)
723                 return -ENOMEM;
724
725         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
726         if (ret > 0)
727                 ret = -ENOENT;
728
729         if (ret)
730                 goto out;
731
732         l = path->nodes[0];
733         slot = path->slots[0];
734         qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
735         btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags);
736         btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer);
737         btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
738         btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
739         btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
740
741         btrfs_mark_buffer_dirty(l);
742
743 out:
744         btrfs_free_path(path);
745         return ret;
746 }
747
748 static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
749                                    struct btrfs_root *root,
750                                    struct btrfs_qgroup *qgroup)
751 {
752         struct btrfs_path *path;
753         struct btrfs_key key;
754         struct extent_buffer *l;
755         struct btrfs_qgroup_info_item *qgroup_info;
756         int ret;
757         int slot;
758
759         if (btrfs_is_testing(root->fs_info))
760                 return 0;
761
762         key.objectid = 0;
763         key.type = BTRFS_QGROUP_INFO_KEY;
764         key.offset = qgroup->qgroupid;
765
766         path = btrfs_alloc_path();
767         if (!path)
768                 return -ENOMEM;
769
770         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
771         if (ret > 0)
772                 ret = -ENOENT;
773
774         if (ret)
775                 goto out;
776
777         l = path->nodes[0];
778         slot = path->slots[0];
779         qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
780         btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
781         btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
782         btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
783         btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
784         btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
785
786         btrfs_mark_buffer_dirty(l);
787
788 out:
789         btrfs_free_path(path);
790         return ret;
791 }
792
793 static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
794                                      struct btrfs_fs_info *fs_info,
795                                     struct btrfs_root *root)
796 {
797         struct btrfs_path *path;
798         struct btrfs_key key;
799         struct extent_buffer *l;
800         struct btrfs_qgroup_status_item *ptr;
801         int ret;
802         int slot;
803
804         key.objectid = 0;
805         key.type = BTRFS_QGROUP_STATUS_KEY;
806         key.offset = 0;
807
808         path = btrfs_alloc_path();
809         if (!path)
810                 return -ENOMEM;
811
812         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
813         if (ret > 0)
814                 ret = -ENOENT;
815
816         if (ret)
817                 goto out;
818
819         l = path->nodes[0];
820         slot = path->slots[0];
821         ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
822         btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
823         btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
824         btrfs_set_qgroup_status_rescan(l, ptr,
825                                 fs_info->qgroup_rescan_progress.objectid);
826
827         btrfs_mark_buffer_dirty(l);
828
829 out:
830         btrfs_free_path(path);
831         return ret;
832 }
833
834 /*
835  * called with qgroup_lock held
836  */
837 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
838                                   struct btrfs_root *root)
839 {
840         struct btrfs_path *path;
841         struct btrfs_key key;
842         struct extent_buffer *leaf = NULL;
843         int ret;
844         int nr = 0;
845
846         path = btrfs_alloc_path();
847         if (!path)
848                 return -ENOMEM;
849
850         path->leave_spinning = 1;
851
852         key.objectid = 0;
853         key.offset = 0;
854         key.type = 0;
855
856         while (1) {
857                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
858                 if (ret < 0)
859                         goto out;
860                 leaf = path->nodes[0];
861                 nr = btrfs_header_nritems(leaf);
862                 if (!nr)
863                         break;
864                 /*
865                  * delete the leaf one by one
866                  * since the whole tree is going
867                  * to be deleted.
868                  */
869                 path->slots[0] = 0;
870                 ret = btrfs_del_items(trans, root, path, 0, nr);
871                 if (ret)
872                         goto out;
873
874                 btrfs_release_path(path);
875         }
876         ret = 0;
877 out:
878         btrfs_free_path(path);
879         return ret;
880 }
881
882 int btrfs_quota_enable(struct btrfs_trans_handle *trans,
883                        struct btrfs_fs_info *fs_info)
884 {
885         struct btrfs_root *quota_root;
886         struct btrfs_root *tree_root = fs_info->tree_root;
887         struct btrfs_path *path = NULL;
888         struct btrfs_qgroup_status_item *ptr;
889         struct extent_buffer *leaf;
890         struct btrfs_key key;
891         struct btrfs_key found_key;
892         struct btrfs_qgroup *qgroup = NULL;
893         int ret = 0;
894         int slot;
895
896         mutex_lock(&fs_info->qgroup_ioctl_lock);
897         if (fs_info->quota_root)
898                 goto out;
899
900         fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
901         if (!fs_info->qgroup_ulist) {
902                 ret = -ENOMEM;
903                 goto out;
904         }
905
906         /*
907          * initially create the quota tree
908          */
909         quota_root = btrfs_create_tree(trans, fs_info,
910                                        BTRFS_QUOTA_TREE_OBJECTID);
911         if (IS_ERR(quota_root)) {
912                 ret =  PTR_ERR(quota_root);
913                 goto out;
914         }
915
916         path = btrfs_alloc_path();
917         if (!path) {
918                 ret = -ENOMEM;
919                 goto out_free_root;
920         }
921
922         key.objectid = 0;
923         key.type = BTRFS_QGROUP_STATUS_KEY;
924         key.offset = 0;
925
926         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
927                                       sizeof(*ptr));
928         if (ret)
929                 goto out_free_path;
930
931         leaf = path->nodes[0];
932         ptr = btrfs_item_ptr(leaf, path->slots[0],
933                                  struct btrfs_qgroup_status_item);
934         btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
935         btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
936         fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
937                                 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
938         btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
939         btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
940
941         btrfs_mark_buffer_dirty(leaf);
942
943         key.objectid = 0;
944         key.type = BTRFS_ROOT_REF_KEY;
945         key.offset = 0;
946
947         btrfs_release_path(path);
948         ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
949         if (ret > 0)
950                 goto out_add_root;
951         if (ret < 0)
952                 goto out_free_path;
953
954
955         while (1) {
956                 slot = path->slots[0];
957                 leaf = path->nodes[0];
958                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
959
960                 if (found_key.type == BTRFS_ROOT_REF_KEY) {
961                         ret = add_qgroup_item(trans, quota_root,
962                                               found_key.offset);
963                         if (ret)
964                                 goto out_free_path;
965
966                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
967                         if (IS_ERR(qgroup)) {
968                                 ret = PTR_ERR(qgroup);
969                                 goto out_free_path;
970                         }
971                 }
972                 ret = btrfs_next_item(tree_root, path);
973                 if (ret < 0)
974                         goto out_free_path;
975                 if (ret)
976                         break;
977         }
978
979 out_add_root:
980         btrfs_release_path(path);
981         ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
982         if (ret)
983                 goto out_free_path;
984
985         qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
986         if (IS_ERR(qgroup)) {
987                 ret = PTR_ERR(qgroup);
988                 goto out_free_path;
989         }
990         spin_lock(&fs_info->qgroup_lock);
991         fs_info->quota_root = quota_root;
992         set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
993         spin_unlock(&fs_info->qgroup_lock);
994         ret = qgroup_rescan_init(fs_info, 0, 1);
995         if (!ret) {
996                 qgroup_rescan_zero_tracking(fs_info);
997                 btrfs_queue_work(fs_info->qgroup_rescan_workers,
998                                  &fs_info->qgroup_rescan_work);
999         }
1000
1001 out_free_path:
1002         btrfs_free_path(path);
1003 out_free_root:
1004         if (ret) {
1005                 free_extent_buffer(quota_root->node);
1006                 free_extent_buffer(quota_root->commit_root);
1007                 kfree(quota_root);
1008         }
1009 out:
1010         if (ret) {
1011                 ulist_free(fs_info->qgroup_ulist);
1012                 fs_info->qgroup_ulist = NULL;
1013         }
1014         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1015         return ret;
1016 }
1017
1018 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
1019                         struct btrfs_fs_info *fs_info)
1020 {
1021         struct btrfs_root *quota_root;
1022         int ret = 0;
1023
1024         mutex_lock(&fs_info->qgroup_ioctl_lock);
1025         if (!fs_info->quota_root)
1026                 goto out;
1027         clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
1028         btrfs_qgroup_wait_for_completion(fs_info, false);
1029         spin_lock(&fs_info->qgroup_lock);
1030         quota_root = fs_info->quota_root;
1031         fs_info->quota_root = NULL;
1032         fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
1033         spin_unlock(&fs_info->qgroup_lock);
1034
1035         btrfs_free_qgroup_config(fs_info);
1036
1037         ret = btrfs_clean_quota_tree(trans, quota_root);
1038         if (ret)
1039                 goto out;
1040
1041         ret = btrfs_del_root(trans, fs_info, &quota_root->root_key);
1042         if (ret)
1043                 goto out;
1044
1045         list_del(&quota_root->dirty_list);
1046
1047         btrfs_tree_lock(quota_root->node);
1048         clean_tree_block(fs_info, quota_root->node);
1049         btrfs_tree_unlock(quota_root->node);
1050         btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
1051
1052         free_extent_buffer(quota_root->node);
1053         free_extent_buffer(quota_root->commit_root);
1054         kfree(quota_root);
1055 out:
1056         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1057         return ret;
1058 }
1059
1060 static void qgroup_dirty(struct btrfs_fs_info *fs_info,
1061                          struct btrfs_qgroup *qgroup)
1062 {
1063         if (list_empty(&qgroup->dirty))
1064                 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
1065 }
1066
1067 static void report_reserved_underflow(struct btrfs_fs_info *fs_info,
1068                                       struct btrfs_qgroup *qgroup,
1069                                       u64 num_bytes)
1070 {
1071 #ifdef CONFIG_BTRFS_DEBUG
1072         WARN_ON(qgroup->reserved < num_bytes);
1073         btrfs_debug(fs_info,
1074                 "qgroup %llu reserved space underflow, have: %llu, to free: %llu",
1075                 qgroup->qgroupid, qgroup->reserved, num_bytes);
1076 #endif
1077         qgroup->reserved = 0;
1078 }
1079
1080 /*
1081  * The easy accounting, we're updating qgroup relationship whose child qgroup
1082  * only has exclusive extents.
1083  *
1084  * In this case, all exclsuive extents will also be exlusive for parent, so
1085  * excl/rfer just get added/removed.
1086  *
1087  * So is qgroup reservation space, which should also be added/removed to
1088  * parent.
1089  * Or when child tries to release reservation space, parent will underflow its
1090  * reservation (for relationship adding case).
1091  *
1092  * Caller should hold fs_info->qgroup_lock.
1093  */
1094 static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1095                                     struct ulist *tmp, u64 ref_root,
1096                                     struct btrfs_qgroup *src, int sign)
1097 {
1098         struct btrfs_qgroup *qgroup;
1099         struct btrfs_qgroup_list *glist;
1100         struct ulist_node *unode;
1101         struct ulist_iterator uiter;
1102         u64 num_bytes = src->excl;
1103         int ret = 0;
1104
1105         qgroup = find_qgroup_rb(fs_info, ref_root);
1106         if (!qgroup)
1107                 goto out;
1108
1109         qgroup->rfer += sign * num_bytes;
1110         qgroup->rfer_cmpr += sign * num_bytes;
1111
1112         WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1113         qgroup->excl += sign * num_bytes;
1114         qgroup->excl_cmpr += sign * num_bytes;
1115
1116         if (sign > 0)
1117                 qgroup_rsv_add_by_qgroup(qgroup, src);
1118         else
1119                 qgroup_rsv_release_by_qgroup(qgroup, src);
1120
1121         qgroup_dirty(fs_info, qgroup);
1122
1123         /* Get all of the parent groups that contain this qgroup */
1124         list_for_each_entry(glist, &qgroup->groups, next_group) {
1125                 ret = ulist_add(tmp, glist->group->qgroupid,
1126                                 qgroup_to_aux(glist->group), GFP_ATOMIC);
1127                 if (ret < 0)
1128                         goto out;
1129         }
1130
1131         /* Iterate all of the parents and adjust their reference counts */
1132         ULIST_ITER_INIT(&uiter);
1133         while ((unode = ulist_next(tmp, &uiter))) {
1134                 qgroup = unode_aux_to_qgroup(unode);
1135                 qgroup->rfer += sign * num_bytes;
1136                 qgroup->rfer_cmpr += sign * num_bytes;
1137                 WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1138                 qgroup->excl += sign * num_bytes;
1139                 if (sign > 0)
1140                         qgroup_rsv_add_by_qgroup(qgroup, src);
1141                 else
1142                         qgroup_rsv_release_by_qgroup(qgroup, src);
1143                 qgroup->excl_cmpr += sign * num_bytes;
1144                 qgroup_dirty(fs_info, qgroup);
1145
1146                 /* Add any parents of the parents */
1147                 list_for_each_entry(glist, &qgroup->groups, next_group) {
1148                         ret = ulist_add(tmp, glist->group->qgroupid,
1149                                         qgroup_to_aux(glist->group), GFP_ATOMIC);
1150                         if (ret < 0)
1151                                 goto out;
1152                 }
1153         }
1154         ret = 0;
1155 out:
1156         return ret;
1157 }
1158
1159
1160 /*
1161  * Quick path for updating qgroup with only excl refs.
1162  *
1163  * In that case, just update all parent will be enough.
1164  * Or we needs to do a full rescan.
1165  * Caller should also hold fs_info->qgroup_lock.
1166  *
1167  * Return 0 for quick update, return >0 for need to full rescan
1168  * and mark INCONSISTENT flag.
1169  * Return < 0 for other error.
1170  */
1171 static int quick_update_accounting(struct btrfs_fs_info *fs_info,
1172                                    struct ulist *tmp, u64 src, u64 dst,
1173                                    int sign)
1174 {
1175         struct btrfs_qgroup *qgroup;
1176         int ret = 1;
1177         int err = 0;
1178
1179         qgroup = find_qgroup_rb(fs_info, src);
1180         if (!qgroup)
1181                 goto out;
1182         if (qgroup->excl == qgroup->rfer) {
1183                 ret = 0;
1184                 err = __qgroup_excl_accounting(fs_info, tmp, dst,
1185                                                qgroup, sign);
1186                 if (err < 0) {
1187                         ret = err;
1188                         goto out;
1189                 }
1190         }
1191 out:
1192         if (ret)
1193                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1194         return ret;
1195 }
1196
1197 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
1198                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1199 {
1200         struct btrfs_root *quota_root;
1201         struct btrfs_qgroup *parent;
1202         struct btrfs_qgroup *member;
1203         struct btrfs_qgroup_list *list;
1204         struct ulist *tmp;
1205         int ret = 0;
1206
1207         /* Check the level of src and dst first */
1208         if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
1209                 return -EINVAL;
1210
1211         tmp = ulist_alloc(GFP_KERNEL);
1212         if (!tmp)
1213                 return -ENOMEM;
1214
1215         mutex_lock(&fs_info->qgroup_ioctl_lock);
1216         quota_root = fs_info->quota_root;
1217         if (!quota_root) {
1218                 ret = -EINVAL;
1219                 goto out;
1220         }
1221         member = find_qgroup_rb(fs_info, src);
1222         parent = find_qgroup_rb(fs_info, dst);
1223         if (!member || !parent) {
1224                 ret = -EINVAL;
1225                 goto out;
1226         }
1227
1228         /* check if such qgroup relation exist firstly */
1229         list_for_each_entry(list, &member->groups, next_group) {
1230                 if (list->group == parent) {
1231                         ret = -EEXIST;
1232                         goto out;
1233                 }
1234         }
1235
1236         ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1237         if (ret)
1238                 goto out;
1239
1240         ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1241         if (ret) {
1242                 del_qgroup_relation_item(trans, quota_root, src, dst);
1243                 goto out;
1244         }
1245
1246         spin_lock(&fs_info->qgroup_lock);
1247         ret = add_relation_rb(fs_info, src, dst);
1248         if (ret < 0) {
1249                 spin_unlock(&fs_info->qgroup_lock);
1250                 goto out;
1251         }
1252         ret = quick_update_accounting(fs_info, tmp, src, dst, 1);
1253         spin_unlock(&fs_info->qgroup_lock);
1254 out:
1255         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1256         ulist_free(tmp);
1257         return ret;
1258 }
1259
1260 static int __del_qgroup_relation(struct btrfs_trans_handle *trans,
1261                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1262 {
1263         struct btrfs_root *quota_root;
1264         struct btrfs_qgroup *parent;
1265         struct btrfs_qgroup *member;
1266         struct btrfs_qgroup_list *list;
1267         struct ulist *tmp;
1268         int ret = 0;
1269         int err;
1270
1271         tmp = ulist_alloc(GFP_KERNEL);
1272         if (!tmp)
1273                 return -ENOMEM;
1274
1275         quota_root = fs_info->quota_root;
1276         if (!quota_root) {
1277                 ret = -EINVAL;
1278                 goto out;
1279         }
1280
1281         member = find_qgroup_rb(fs_info, src);
1282         parent = find_qgroup_rb(fs_info, dst);
1283         if (!member || !parent) {
1284                 ret = -EINVAL;
1285                 goto out;
1286         }
1287
1288         /* check if such qgroup relation exist firstly */
1289         list_for_each_entry(list, &member->groups, next_group) {
1290                 if (list->group == parent)
1291                         goto exist;
1292         }
1293         ret = -ENOENT;
1294         goto out;
1295 exist:
1296         ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1297         err = del_qgroup_relation_item(trans, quota_root, dst, src);
1298         if (err && !ret)
1299                 ret = err;
1300
1301         spin_lock(&fs_info->qgroup_lock);
1302         del_relation_rb(fs_info, src, dst);
1303         ret = quick_update_accounting(fs_info, tmp, src, dst, -1);
1304         spin_unlock(&fs_info->qgroup_lock);
1305 out:
1306         ulist_free(tmp);
1307         return ret;
1308 }
1309
1310 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1311                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1312 {
1313         int ret = 0;
1314
1315         mutex_lock(&fs_info->qgroup_ioctl_lock);
1316         ret = __del_qgroup_relation(trans, fs_info, src, dst);
1317         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1318
1319         return ret;
1320 }
1321
1322 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1323                         struct btrfs_fs_info *fs_info, u64 qgroupid)
1324 {
1325         struct btrfs_root *quota_root;
1326         struct btrfs_qgroup *qgroup;
1327         int ret = 0;
1328
1329         mutex_lock(&fs_info->qgroup_ioctl_lock);
1330         quota_root = fs_info->quota_root;
1331         if (!quota_root) {
1332                 ret = -EINVAL;
1333                 goto out;
1334         }
1335         qgroup = find_qgroup_rb(fs_info, qgroupid);
1336         if (qgroup) {
1337                 ret = -EEXIST;
1338                 goto out;
1339         }
1340
1341         ret = add_qgroup_item(trans, quota_root, qgroupid);
1342         if (ret)
1343                 goto out;
1344
1345         spin_lock(&fs_info->qgroup_lock);
1346         qgroup = add_qgroup_rb(fs_info, qgroupid);
1347         spin_unlock(&fs_info->qgroup_lock);
1348
1349         if (IS_ERR(qgroup))
1350                 ret = PTR_ERR(qgroup);
1351 out:
1352         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1353         return ret;
1354 }
1355
1356 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1357                         struct btrfs_fs_info *fs_info, u64 qgroupid)
1358 {
1359         struct btrfs_root *quota_root;
1360         struct btrfs_qgroup *qgroup;
1361         struct btrfs_qgroup_list *list;
1362         int ret = 0;
1363
1364         mutex_lock(&fs_info->qgroup_ioctl_lock);
1365         quota_root = fs_info->quota_root;
1366         if (!quota_root) {
1367                 ret = -EINVAL;
1368                 goto out;
1369         }
1370
1371         qgroup = find_qgroup_rb(fs_info, qgroupid);
1372         if (!qgroup) {
1373                 ret = -ENOENT;
1374                 goto out;
1375         } else {
1376                 /* check if there are no children of this qgroup */
1377                 if (!list_empty(&qgroup->members)) {
1378                         ret = -EBUSY;
1379                         goto out;
1380                 }
1381         }
1382         ret = del_qgroup_item(trans, quota_root, qgroupid);
1383         if (ret && ret != -ENOENT)
1384                 goto out;
1385
1386         while (!list_empty(&qgroup->groups)) {
1387                 list = list_first_entry(&qgroup->groups,
1388                                         struct btrfs_qgroup_list, next_group);
1389                 ret = __del_qgroup_relation(trans, fs_info,
1390                                            qgroupid,
1391                                            list->group->qgroupid);
1392                 if (ret)
1393                         goto out;
1394         }
1395
1396         spin_lock(&fs_info->qgroup_lock);
1397         del_qgroup_rb(fs_info, qgroupid);
1398         spin_unlock(&fs_info->qgroup_lock);
1399 out:
1400         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1401         return ret;
1402 }
1403
1404 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1405                        struct btrfs_fs_info *fs_info, u64 qgroupid,
1406                        struct btrfs_qgroup_limit *limit)
1407 {
1408         struct btrfs_root *quota_root;
1409         struct btrfs_qgroup *qgroup;
1410         int ret = 0;
1411         /* Sometimes we would want to clear the limit on this qgroup.
1412          * To meet this requirement, we treat the -1 as a special value
1413          * which tell kernel to clear the limit on this qgroup.
1414          */
1415         const u64 CLEAR_VALUE = -1;
1416
1417         mutex_lock(&fs_info->qgroup_ioctl_lock);
1418         quota_root = fs_info->quota_root;
1419         if (!quota_root) {
1420                 ret = -EINVAL;
1421                 goto out;
1422         }
1423
1424         qgroup = find_qgroup_rb(fs_info, qgroupid);
1425         if (!qgroup) {
1426                 ret = -ENOENT;
1427                 goto out;
1428         }
1429
1430         spin_lock(&fs_info->qgroup_lock);
1431         if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) {
1432                 if (limit->max_rfer == CLEAR_VALUE) {
1433                         qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
1434                         limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
1435                         qgroup->max_rfer = 0;
1436                 } else {
1437                         qgroup->max_rfer = limit->max_rfer;
1438                 }
1439         }
1440         if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) {
1441                 if (limit->max_excl == CLEAR_VALUE) {
1442                         qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
1443                         limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
1444                         qgroup->max_excl = 0;
1445                 } else {
1446                         qgroup->max_excl = limit->max_excl;
1447                 }
1448         }
1449         if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) {
1450                 if (limit->rsv_rfer == CLEAR_VALUE) {
1451                         qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
1452                         limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
1453                         qgroup->rsv_rfer = 0;
1454                 } else {
1455                         qgroup->rsv_rfer = limit->rsv_rfer;
1456                 }
1457         }
1458         if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) {
1459                 if (limit->rsv_excl == CLEAR_VALUE) {
1460                         qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
1461                         limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
1462                         qgroup->rsv_excl = 0;
1463                 } else {
1464                         qgroup->rsv_excl = limit->rsv_excl;
1465                 }
1466         }
1467         qgroup->lim_flags |= limit->flags;
1468
1469         spin_unlock(&fs_info->qgroup_lock);
1470
1471         ret = update_qgroup_limit_item(trans, quota_root, qgroup);
1472         if (ret) {
1473                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1474                 btrfs_info(fs_info, "unable to update quota limit for %llu",
1475                        qgroupid);
1476         }
1477
1478 out:
1479         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1480         return ret;
1481 }
1482
1483 int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
1484                                 struct btrfs_delayed_ref_root *delayed_refs,
1485                                 struct btrfs_qgroup_extent_record *record)
1486 {
1487         struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
1488         struct rb_node *parent_node = NULL;
1489         struct btrfs_qgroup_extent_record *entry;
1490         u64 bytenr = record->bytenr;
1491
1492         assert_spin_locked(&delayed_refs->lock);
1493         trace_btrfs_qgroup_trace_extent(fs_info, record);
1494
1495         while (*p) {
1496                 parent_node = *p;
1497                 entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
1498                                  node);
1499                 if (bytenr < entry->bytenr)
1500                         p = &(*p)->rb_left;
1501                 else if (bytenr > entry->bytenr)
1502                         p = &(*p)->rb_right;
1503                 else
1504                         return 1;
1505         }
1506
1507         rb_link_node(&record->node, parent_node, p);
1508         rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
1509         return 0;
1510 }
1511
1512 int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
1513                                    struct btrfs_qgroup_extent_record *qrecord)
1514 {
1515         struct ulist *old_root;
1516         u64 bytenr = qrecord->bytenr;
1517         int ret;
1518
1519         ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
1520         if (ret < 0) {
1521                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1522                 btrfs_warn(fs_info,
1523 "error accounting new delayed refs extent (err code: %d), quota inconsistent",
1524                         ret);
1525                 return 0;
1526         }
1527
1528         /*
1529          * Here we don't need to get the lock of
1530          * trans->transaction->delayed_refs, since inserted qrecord won't
1531          * be deleted, only qrecord->node may be modified (new qrecord insert)
1532          *
1533          * So modifying qrecord->old_roots is safe here
1534          */
1535         qrecord->old_roots = old_root;
1536         return 0;
1537 }
1538
1539 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
1540                 struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
1541                 gfp_t gfp_flag)
1542 {
1543         struct btrfs_qgroup_extent_record *record;
1544         struct btrfs_delayed_ref_root *delayed_refs;
1545         int ret;
1546
1547         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)
1548             || bytenr == 0 || num_bytes == 0)
1549                 return 0;
1550         if (WARN_ON(trans == NULL))
1551                 return -EINVAL;
1552         record = kmalloc(sizeof(*record), gfp_flag);
1553         if (!record)
1554                 return -ENOMEM;
1555
1556         delayed_refs = &trans->transaction->delayed_refs;
1557         record->bytenr = bytenr;
1558         record->num_bytes = num_bytes;
1559         record->old_roots = NULL;
1560
1561         spin_lock(&delayed_refs->lock);
1562         ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
1563         spin_unlock(&delayed_refs->lock);
1564         if (ret > 0) {
1565                 kfree(record);
1566                 return 0;
1567         }
1568         return btrfs_qgroup_trace_extent_post(fs_info, record);
1569 }
1570
1571 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
1572                                   struct btrfs_fs_info *fs_info,
1573                                   struct extent_buffer *eb)
1574 {
1575         int nr = btrfs_header_nritems(eb);
1576         int i, extent_type, ret;
1577         struct btrfs_key key;
1578         struct btrfs_file_extent_item *fi;
1579         u64 bytenr, num_bytes;
1580
1581         /* We can be called directly from walk_up_proc() */
1582         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
1583                 return 0;
1584
1585         for (i = 0; i < nr; i++) {
1586                 btrfs_item_key_to_cpu(eb, &key, i);
1587
1588                 if (key.type != BTRFS_EXTENT_DATA_KEY)
1589                         continue;
1590
1591                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
1592                 /* filter out non qgroup-accountable extents  */
1593                 extent_type = btrfs_file_extent_type(eb, fi);
1594
1595                 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
1596                         continue;
1597
1598                 bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1599                 if (!bytenr)
1600                         continue;
1601
1602                 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1603
1604                 ret = btrfs_qgroup_trace_extent(trans, fs_info, bytenr,
1605                                                 num_bytes, GFP_NOFS);
1606                 if (ret)
1607                         return ret;
1608         }
1609         cond_resched();
1610         return 0;
1611 }
1612
1613 /*
1614  * Walk up the tree from the bottom, freeing leaves and any interior
1615  * nodes which have had all slots visited. If a node (leaf or
1616  * interior) is freed, the node above it will have it's slot
1617  * incremented. The root node will never be freed.
1618  *
1619  * At the end of this function, we should have a path which has all
1620  * slots incremented to the next position for a search. If we need to
1621  * read a new node it will be NULL and the node above it will have the
1622  * correct slot selected for a later read.
1623  *
1624  * If we increment the root nodes slot counter past the number of
1625  * elements, 1 is returned to signal completion of the search.
1626  */
1627 static int adjust_slots_upwards(struct btrfs_path *path, int root_level)
1628 {
1629         int level = 0;
1630         int nr, slot;
1631         struct extent_buffer *eb;
1632
1633         if (root_level == 0)
1634                 return 1;
1635
1636         while (level <= root_level) {
1637                 eb = path->nodes[level];
1638                 nr = btrfs_header_nritems(eb);
1639                 path->slots[level]++;
1640                 slot = path->slots[level];
1641                 if (slot >= nr || level == 0) {
1642                         /*
1643                          * Don't free the root -  we will detect this
1644                          * condition after our loop and return a
1645                          * positive value for caller to stop walking the tree.
1646                          */
1647                         if (level != root_level) {
1648                                 btrfs_tree_unlock_rw(eb, path->locks[level]);
1649                                 path->locks[level] = 0;
1650
1651                                 free_extent_buffer(eb);
1652                                 path->nodes[level] = NULL;
1653                                 path->slots[level] = 0;
1654                         }
1655                 } else {
1656                         /*
1657                          * We have a valid slot to walk back down
1658                          * from. Stop here so caller can process these
1659                          * new nodes.
1660                          */
1661                         break;
1662                 }
1663
1664                 level++;
1665         }
1666
1667         eb = path->nodes[root_level];
1668         if (path->slots[root_level] >= btrfs_header_nritems(eb))
1669                 return 1;
1670
1671         return 0;
1672 }
1673
1674 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
1675                                struct btrfs_root *root,
1676                                struct extent_buffer *root_eb,
1677                                u64 root_gen, int root_level)
1678 {
1679         struct btrfs_fs_info *fs_info = root->fs_info;
1680         int ret = 0;
1681         int level;
1682         struct extent_buffer *eb = root_eb;
1683         struct btrfs_path *path = NULL;
1684
1685         BUG_ON(root_level < 0 || root_level >= BTRFS_MAX_LEVEL);
1686         BUG_ON(root_eb == NULL);
1687
1688         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
1689                 return 0;
1690
1691         if (!extent_buffer_uptodate(root_eb)) {
1692                 ret = btrfs_read_buffer(root_eb, root_gen);
1693                 if (ret)
1694                         goto out;
1695         }
1696
1697         if (root_level == 0) {
1698                 ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, root_eb);
1699                 goto out;
1700         }
1701
1702         path = btrfs_alloc_path();
1703         if (!path)
1704                 return -ENOMEM;
1705
1706         /*
1707          * Walk down the tree.  Missing extent blocks are filled in as
1708          * we go. Metadata is accounted every time we read a new
1709          * extent block.
1710          *
1711          * When we reach a leaf, we account for file extent items in it,
1712          * walk back up the tree (adjusting slot pointers as we go)
1713          * and restart the search process.
1714          */
1715         extent_buffer_get(root_eb); /* For path */
1716         path->nodes[root_level] = root_eb;
1717         path->slots[root_level] = 0;
1718         path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
1719 walk_down:
1720         level = root_level;
1721         while (level >= 0) {
1722                 if (path->nodes[level] == NULL) {
1723                         int parent_slot;
1724                         u64 child_gen;
1725                         u64 child_bytenr;
1726
1727                         /*
1728                          * We need to get child blockptr/gen from parent before
1729                          * we can read it.
1730                           */
1731                         eb = path->nodes[level + 1];
1732                         parent_slot = path->slots[level + 1];
1733                         child_bytenr = btrfs_node_blockptr(eb, parent_slot);
1734                         child_gen = btrfs_node_ptr_generation(eb, parent_slot);
1735
1736                         eb = read_tree_block(fs_info, child_bytenr, child_gen);
1737                         if (IS_ERR(eb)) {
1738                                 ret = PTR_ERR(eb);
1739                                 goto out;
1740                         } else if (!extent_buffer_uptodate(eb)) {
1741                                 free_extent_buffer(eb);
1742                                 ret = -EIO;
1743                                 goto out;
1744                         }
1745
1746                         path->nodes[level] = eb;
1747                         path->slots[level] = 0;
1748
1749                         btrfs_tree_read_lock(eb);
1750                         btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1751                         path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
1752
1753                         ret = btrfs_qgroup_trace_extent(trans, fs_info,
1754                                                         child_bytenr,
1755                                                         fs_info->nodesize,
1756                                                         GFP_NOFS);
1757                         if (ret)
1758                                 goto out;
1759                 }
1760
1761                 if (level == 0) {
1762                         ret = btrfs_qgroup_trace_leaf_items(trans,fs_info,
1763                                                            path->nodes[level]);
1764                         if (ret)
1765                                 goto out;
1766
1767                         /* Nonzero return here means we completed our search */
1768                         ret = adjust_slots_upwards(path, root_level);
1769                         if (ret)
1770                                 break;
1771
1772                         /* Restart search with new slots */
1773                         goto walk_down;
1774                 }
1775
1776                 level--;
1777         }
1778
1779         ret = 0;
1780 out:
1781         btrfs_free_path(path);
1782
1783         return ret;
1784 }
1785
1786 #define UPDATE_NEW      0
1787 #define UPDATE_OLD      1
1788 /*
1789  * Walk all of the roots that points to the bytenr and adjust their refcnts.
1790  */
1791 static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
1792                                 struct ulist *roots, struct ulist *tmp,
1793                                 struct ulist *qgroups, u64 seq, int update_old)
1794 {
1795         struct ulist_node *unode;
1796         struct ulist_iterator uiter;
1797         struct ulist_node *tmp_unode;
1798         struct ulist_iterator tmp_uiter;
1799         struct btrfs_qgroup *qg;
1800         int ret = 0;
1801
1802         if (!roots)
1803                 return 0;
1804         ULIST_ITER_INIT(&uiter);
1805         while ((unode = ulist_next(roots, &uiter))) {
1806                 qg = find_qgroup_rb(fs_info, unode->val);
1807                 if (!qg)
1808                         continue;
1809
1810                 ulist_reinit(tmp);
1811                 ret = ulist_add(qgroups, qg->qgroupid, qgroup_to_aux(qg),
1812                                 GFP_ATOMIC);
1813                 if (ret < 0)
1814                         return ret;
1815                 ret = ulist_add(tmp, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC);
1816                 if (ret < 0)
1817                         return ret;
1818                 ULIST_ITER_INIT(&tmp_uiter);
1819                 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1820                         struct btrfs_qgroup_list *glist;
1821
1822                         qg = unode_aux_to_qgroup(tmp_unode);
1823                         if (update_old)
1824                                 btrfs_qgroup_update_old_refcnt(qg, seq, 1);
1825                         else
1826                                 btrfs_qgroup_update_new_refcnt(qg, seq, 1);
1827                         list_for_each_entry(glist, &qg->groups, next_group) {
1828                                 ret = ulist_add(qgroups, glist->group->qgroupid,
1829                                                 qgroup_to_aux(glist->group),
1830                                                 GFP_ATOMIC);
1831                                 if (ret < 0)
1832                                         return ret;
1833                                 ret = ulist_add(tmp, glist->group->qgroupid,
1834                                                 qgroup_to_aux(glist->group),
1835                                                 GFP_ATOMIC);
1836                                 if (ret < 0)
1837                                         return ret;
1838                         }
1839                 }
1840         }
1841         return 0;
1842 }
1843
1844 /*
1845  * Update qgroup rfer/excl counters.
1846  * Rfer update is easy, codes can explain themselves.
1847  *
1848  * Excl update is tricky, the update is split into 2 part.
1849  * Part 1: Possible exclusive <-> sharing detect:
1850  *      |       A       |       !A      |
1851  *  -------------------------------------
1852  *  B   |       *       |       -       |
1853  *  -------------------------------------
1854  *  !B  |       +       |       **      |
1855  *  -------------------------------------
1856  *
1857  * Conditions:
1858  * A:   cur_old_roots < nr_old_roots    (not exclusive before)
1859  * !A:  cur_old_roots == nr_old_roots   (possible exclusive before)
1860  * B:   cur_new_roots < nr_new_roots    (not exclusive now)
1861  * !B:  cur_new_roots == nr_new_roots   (possible exclusive now)
1862  *
1863  * Results:
1864  * +: Possible sharing -> exclusive     -: Possible exclusive -> sharing
1865  * *: Definitely not changed.           **: Possible unchanged.
1866  *
1867  * For !A and !B condition, the exception is cur_old/new_roots == 0 case.
1868  *
1869  * To make the logic clear, we first use condition A and B to split
1870  * combination into 4 results.
1871  *
1872  * Then, for result "+" and "-", check old/new_roots == 0 case, as in them
1873  * only on variant maybe 0.
1874  *
1875  * Lastly, check result **, since there are 2 variants maybe 0, split them
1876  * again(2x2).
1877  * But this time we don't need to consider other things, the codes and logic
1878  * is easy to understand now.
1879  */
1880 static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
1881                                   struct ulist *qgroups,
1882                                   u64 nr_old_roots,
1883                                   u64 nr_new_roots,
1884                                   u64 num_bytes, u64 seq)
1885 {
1886         struct ulist_node *unode;
1887         struct ulist_iterator uiter;
1888         struct btrfs_qgroup *qg;
1889         u64 cur_new_count, cur_old_count;
1890
1891         ULIST_ITER_INIT(&uiter);
1892         while ((unode = ulist_next(qgroups, &uiter))) {
1893                 bool dirty = false;
1894
1895                 qg = unode_aux_to_qgroup(unode);
1896                 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
1897                 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
1898
1899                 trace_qgroup_update_counters(fs_info, qg->qgroupid,
1900                                              cur_old_count, cur_new_count);
1901
1902                 /* Rfer update part */
1903                 if (cur_old_count == 0 && cur_new_count > 0) {
1904                         qg->rfer += num_bytes;
1905                         qg->rfer_cmpr += num_bytes;
1906                         dirty = true;
1907                 }
1908                 if (cur_old_count > 0 && cur_new_count == 0) {
1909                         qg->rfer -= num_bytes;
1910                         qg->rfer_cmpr -= num_bytes;
1911                         dirty = true;
1912                 }
1913
1914                 /* Excl update part */
1915                 /* Exclusive/none -> shared case */
1916                 if (cur_old_count == nr_old_roots &&
1917                     cur_new_count < nr_new_roots) {
1918                         /* Exclusive -> shared */
1919                         if (cur_old_count != 0) {
1920                                 qg->excl -= num_bytes;
1921                                 qg->excl_cmpr -= num_bytes;
1922                                 dirty = true;
1923                         }
1924                 }
1925
1926                 /* Shared -> exclusive/none case */
1927                 if (cur_old_count < nr_old_roots &&
1928                     cur_new_count == nr_new_roots) {
1929                         /* Shared->exclusive */
1930                         if (cur_new_count != 0) {
1931                                 qg->excl += num_bytes;
1932                                 qg->excl_cmpr += num_bytes;
1933                                 dirty = true;
1934                         }
1935                 }
1936
1937                 /* Exclusive/none -> exclusive/none case */
1938                 if (cur_old_count == nr_old_roots &&
1939                     cur_new_count == nr_new_roots) {
1940                         if (cur_old_count == 0) {
1941                                 /* None -> exclusive/none */
1942
1943                                 if (cur_new_count != 0) {
1944                                         /* None -> exclusive */
1945                                         qg->excl += num_bytes;
1946                                         qg->excl_cmpr += num_bytes;
1947                                         dirty = true;
1948                                 }
1949                                 /* None -> none, nothing changed */
1950                         } else {
1951                                 /* Exclusive -> exclusive/none */
1952
1953                                 if (cur_new_count == 0) {
1954                                         /* Exclusive -> none */
1955                                         qg->excl -= num_bytes;
1956                                         qg->excl_cmpr -= num_bytes;
1957                                         dirty = true;
1958                                 }
1959                                 /* Exclusive -> exclusive, nothing changed */
1960                         }
1961                 }
1962
1963                 if (dirty)
1964                         qgroup_dirty(fs_info, qg);
1965         }
1966         return 0;
1967 }
1968
1969 /*
1970  * Check if the @roots potentially is a list of fs tree roots
1971  *
1972  * Return 0 for definitely not a fs/subvol tree roots ulist
1973  * Return 1 for possible fs/subvol tree roots in the list (considering an empty
1974  *          one as well)
1975  */
1976 static int maybe_fs_roots(struct ulist *roots)
1977 {
1978         struct ulist_node *unode;
1979         struct ulist_iterator uiter;
1980
1981         /* Empty one, still possible for fs roots */
1982         if (!roots || roots->nnodes == 0)
1983                 return 1;
1984
1985         ULIST_ITER_INIT(&uiter);
1986         unode = ulist_next(roots, &uiter);
1987         if (!unode)
1988                 return 1;
1989
1990         /*
1991          * If it contains fs tree roots, then it must belong to fs/subvol
1992          * trees.
1993          * If it contains a non-fs tree, it won't be shared with fs/subvol trees.
1994          */
1995         return is_fstree(unode->val);
1996 }
1997
1998 int
1999 btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
2000                             struct btrfs_fs_info *fs_info,
2001                             u64 bytenr, u64 num_bytes,
2002                             struct ulist *old_roots, struct ulist *new_roots)
2003 {
2004         struct ulist *qgroups = NULL;
2005         struct ulist *tmp = NULL;
2006         u64 seq;
2007         u64 nr_new_roots = 0;
2008         u64 nr_old_roots = 0;
2009         int ret = 0;
2010
2011         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
2012                 return 0;
2013
2014         if (new_roots) {
2015                 if (!maybe_fs_roots(new_roots))
2016                         goto out_free;
2017                 nr_new_roots = new_roots->nnodes;
2018         }
2019         if (old_roots) {
2020                 if (!maybe_fs_roots(old_roots))
2021                         goto out_free;
2022                 nr_old_roots = old_roots->nnodes;
2023         }
2024
2025         /* Quick exit, either not fs tree roots, or won't affect any qgroup */
2026         if (nr_old_roots == 0 && nr_new_roots == 0)
2027                 goto out_free;
2028
2029         BUG_ON(!fs_info->quota_root);
2030
2031         trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes,
2032                                           nr_old_roots, nr_new_roots);
2033
2034         qgroups = ulist_alloc(GFP_NOFS);
2035         if (!qgroups) {
2036                 ret = -ENOMEM;
2037                 goto out_free;
2038         }
2039         tmp = ulist_alloc(GFP_NOFS);
2040         if (!tmp) {
2041                 ret = -ENOMEM;
2042                 goto out_free;
2043         }
2044
2045         mutex_lock(&fs_info->qgroup_rescan_lock);
2046         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
2047                 if (fs_info->qgroup_rescan_progress.objectid <= bytenr) {
2048                         mutex_unlock(&fs_info->qgroup_rescan_lock);
2049                         ret = 0;
2050                         goto out_free;
2051                 }
2052         }
2053         mutex_unlock(&fs_info->qgroup_rescan_lock);
2054
2055         spin_lock(&fs_info->qgroup_lock);
2056         seq = fs_info->qgroup_seq;
2057
2058         /* Update old refcnts using old_roots */
2059         ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq,
2060                                    UPDATE_OLD);
2061         if (ret < 0)
2062                 goto out;
2063
2064         /* Update new refcnts using new_roots */
2065         ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq,
2066                                    UPDATE_NEW);
2067         if (ret < 0)
2068                 goto out;
2069
2070         qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots,
2071                                num_bytes, seq);
2072
2073         /*
2074          * Bump qgroup_seq to avoid seq overlap
2075          */
2076         fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1;
2077 out:
2078         spin_unlock(&fs_info->qgroup_lock);
2079 out_free:
2080         ulist_free(tmp);
2081         ulist_free(qgroups);
2082         ulist_free(old_roots);
2083         ulist_free(new_roots);
2084         return ret;
2085 }
2086
2087 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
2088 {
2089         struct btrfs_fs_info *fs_info = trans->fs_info;
2090         struct btrfs_qgroup_extent_record *record;
2091         struct btrfs_delayed_ref_root *delayed_refs;
2092         struct ulist *new_roots = NULL;
2093         struct rb_node *node;
2094         u64 qgroup_to_skip;
2095         int ret = 0;
2096
2097         delayed_refs = &trans->transaction->delayed_refs;
2098         qgroup_to_skip = delayed_refs->qgroup_to_skip;
2099         while ((node = rb_first(&delayed_refs->dirty_extent_root))) {
2100                 record = rb_entry(node, struct btrfs_qgroup_extent_record,
2101                                   node);
2102
2103                 trace_btrfs_qgroup_account_extents(fs_info, record);
2104
2105                 if (!ret) {
2106                         /*
2107                          * Old roots should be searched when inserting qgroup
2108                          * extent record
2109                          */
2110                         if (WARN_ON(!record->old_roots)) {
2111                                 /* Search commit root to find old_roots */
2112                                 ret = btrfs_find_all_roots(NULL, fs_info,
2113                                                 record->bytenr, 0,
2114                                                 &record->old_roots, false);
2115                                 if (ret < 0)
2116                                         goto cleanup;
2117                         }
2118
2119                         /*
2120                          * Use SEQ_LAST as time_seq to do special search, which
2121                          * doesn't lock tree or delayed_refs and search current
2122                          * root. It's safe inside commit_transaction().
2123                          */
2124                         ret = btrfs_find_all_roots(trans, fs_info,
2125                                 record->bytenr, SEQ_LAST, &new_roots, false);
2126                         if (ret < 0)
2127                                 goto cleanup;
2128                         if (qgroup_to_skip) {
2129                                 ulist_del(new_roots, qgroup_to_skip, 0);
2130                                 ulist_del(record->old_roots, qgroup_to_skip,
2131                                           0);
2132                         }
2133                         ret = btrfs_qgroup_account_extent(trans, fs_info,
2134                                         record->bytenr, record->num_bytes,
2135                                         record->old_roots, new_roots);
2136                         record->old_roots = NULL;
2137                         new_roots = NULL;
2138                 }
2139 cleanup:
2140                 ulist_free(record->old_roots);
2141                 ulist_free(new_roots);
2142                 new_roots = NULL;
2143                 rb_erase(node, &delayed_refs->dirty_extent_root);
2144                 kfree(record);
2145
2146         }
2147         return ret;
2148 }
2149
2150 /*
2151  * called from commit_transaction. Writes all changed qgroups to disk.
2152  */
2153 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
2154                       struct btrfs_fs_info *fs_info)
2155 {
2156         struct btrfs_root *quota_root = fs_info->quota_root;
2157         int ret = 0;
2158
2159         if (!quota_root)
2160                 return ret;
2161
2162         spin_lock(&fs_info->qgroup_lock);
2163         while (!list_empty(&fs_info->dirty_qgroups)) {
2164                 struct btrfs_qgroup *qgroup;
2165                 qgroup = list_first_entry(&fs_info->dirty_qgroups,
2166                                           struct btrfs_qgroup, dirty);
2167                 list_del_init(&qgroup->dirty);
2168                 spin_unlock(&fs_info->qgroup_lock);
2169                 ret = update_qgroup_info_item(trans, quota_root, qgroup);
2170                 if (ret)
2171                         fs_info->qgroup_flags |=
2172                                         BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2173                 ret = update_qgroup_limit_item(trans, quota_root, qgroup);
2174                 if (ret)
2175                         fs_info->qgroup_flags |=
2176                                         BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2177                 spin_lock(&fs_info->qgroup_lock);
2178         }
2179         if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
2180                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
2181         else
2182                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
2183         spin_unlock(&fs_info->qgroup_lock);
2184
2185         ret = update_qgroup_status_item(trans, fs_info, quota_root);
2186         if (ret)
2187                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2188
2189         return ret;
2190 }
2191
2192 /*
2193  * Copy the accounting information between qgroups. This is necessary
2194  * when a snapshot or a subvolume is created. Throwing an error will
2195  * cause a transaction abort so we take extra care here to only error
2196  * when a readonly fs is a reasonable outcome.
2197  */
2198 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2199                          struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
2200                          struct btrfs_qgroup_inherit *inherit)
2201 {
2202         int ret = 0;
2203         int i;
2204         u64 *i_qgroups;
2205         struct btrfs_root *quota_root = fs_info->quota_root;
2206         struct btrfs_qgroup *srcgroup;
2207         struct btrfs_qgroup *dstgroup;
2208         u32 level_size = 0;
2209         u64 nums;
2210
2211         mutex_lock(&fs_info->qgroup_ioctl_lock);
2212         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
2213                 goto out;
2214
2215         if (!quota_root) {
2216                 ret = -EINVAL;
2217                 goto out;
2218         }
2219
2220         if (inherit) {
2221                 i_qgroups = (u64 *)(inherit + 1);
2222                 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
2223                        2 * inherit->num_excl_copies;
2224                 for (i = 0; i < nums; ++i) {
2225                         srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
2226
2227                         /*
2228                          * Zero out invalid groups so we can ignore
2229                          * them later.
2230                          */
2231                         if (!srcgroup ||
2232                             ((srcgroup->qgroupid >> 48) <= (objectid >> 48)))
2233                                 *i_qgroups = 0ULL;
2234
2235                         ++i_qgroups;
2236                 }
2237         }
2238
2239         /*
2240          * create a tracking group for the subvol itself
2241          */
2242         ret = add_qgroup_item(trans, quota_root, objectid);
2243         if (ret)
2244                 goto out;
2245
2246         if (srcid) {
2247                 struct btrfs_root *srcroot;
2248                 struct btrfs_key srckey;
2249
2250                 srckey.objectid = srcid;
2251                 srckey.type = BTRFS_ROOT_ITEM_KEY;
2252                 srckey.offset = (u64)-1;
2253                 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
2254                 if (IS_ERR(srcroot)) {
2255                         ret = PTR_ERR(srcroot);
2256                         goto out;
2257                 }
2258
2259                 level_size = fs_info->nodesize;
2260         }
2261
2262         /*
2263          * add qgroup to all inherited groups
2264          */
2265         if (inherit) {
2266                 i_qgroups = (u64 *)(inherit + 1);
2267                 for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) {
2268                         if (*i_qgroups == 0)
2269                                 continue;
2270                         ret = add_qgroup_relation_item(trans, quota_root,
2271                                                        objectid, *i_qgroups);
2272                         if (ret && ret != -EEXIST)
2273                                 goto out;
2274                         ret = add_qgroup_relation_item(trans, quota_root,
2275                                                        *i_qgroups, objectid);
2276                         if (ret && ret != -EEXIST)
2277                                 goto out;
2278                 }
2279                 ret = 0;
2280         }
2281
2282
2283         spin_lock(&fs_info->qgroup_lock);
2284
2285         dstgroup = add_qgroup_rb(fs_info, objectid);
2286         if (IS_ERR(dstgroup)) {
2287                 ret = PTR_ERR(dstgroup);
2288                 goto unlock;
2289         }
2290
2291         if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
2292                 dstgroup->lim_flags = inherit->lim.flags;
2293                 dstgroup->max_rfer = inherit->lim.max_rfer;
2294                 dstgroup->max_excl = inherit->lim.max_excl;
2295                 dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
2296                 dstgroup->rsv_excl = inherit->lim.rsv_excl;
2297
2298                 ret = update_qgroup_limit_item(trans, quota_root, dstgroup);
2299                 if (ret) {
2300                         fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2301                         btrfs_info(fs_info,
2302                                    "unable to update quota limit for %llu",
2303                                    dstgroup->qgroupid);
2304                         goto unlock;
2305                 }
2306         }
2307
2308         if (srcid) {
2309                 srcgroup = find_qgroup_rb(fs_info, srcid);
2310                 if (!srcgroup)
2311                         goto unlock;
2312
2313                 /*
2314                  * We call inherit after we clone the root in order to make sure
2315                  * our counts don't go crazy, so at this point the only
2316                  * difference between the two roots should be the root node.
2317                  */
2318                 dstgroup->rfer = srcgroup->rfer;
2319                 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
2320                 dstgroup->excl = level_size;
2321                 dstgroup->excl_cmpr = level_size;
2322                 srcgroup->excl = level_size;
2323                 srcgroup->excl_cmpr = level_size;
2324
2325                 /* inherit the limit info */
2326                 dstgroup->lim_flags = srcgroup->lim_flags;
2327                 dstgroup->max_rfer = srcgroup->max_rfer;
2328                 dstgroup->max_excl = srcgroup->max_excl;
2329                 dstgroup->rsv_rfer = srcgroup->rsv_rfer;
2330                 dstgroup->rsv_excl = srcgroup->rsv_excl;
2331
2332                 qgroup_dirty(fs_info, dstgroup);
2333                 qgroup_dirty(fs_info, srcgroup);
2334         }
2335
2336         if (!inherit)
2337                 goto unlock;
2338
2339         i_qgroups = (u64 *)(inherit + 1);
2340         for (i = 0; i < inherit->num_qgroups; ++i) {
2341                 if (*i_qgroups) {
2342                         ret = add_relation_rb(fs_info, objectid, *i_qgroups);
2343                         if (ret)
2344                                 goto unlock;
2345                 }
2346                 ++i_qgroups;
2347         }
2348
2349         for (i = 0; i <  inherit->num_ref_copies; ++i, i_qgroups += 2) {
2350                 struct btrfs_qgroup *src;
2351                 struct btrfs_qgroup *dst;
2352
2353                 if (!i_qgroups[0] || !i_qgroups[1])
2354                         continue;
2355
2356                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2357                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2358
2359                 if (!src || !dst) {
2360                         ret = -EINVAL;
2361                         goto unlock;
2362                 }
2363
2364                 dst->rfer = src->rfer - level_size;
2365                 dst->rfer_cmpr = src->rfer_cmpr - level_size;
2366         }
2367         for (i = 0; i <  inherit->num_excl_copies; ++i, i_qgroups += 2) {
2368                 struct btrfs_qgroup *src;
2369                 struct btrfs_qgroup *dst;
2370
2371                 if (!i_qgroups[0] || !i_qgroups[1])
2372                         continue;
2373
2374                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2375                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2376
2377                 if (!src || !dst) {
2378                         ret = -EINVAL;
2379                         goto unlock;
2380                 }
2381
2382                 dst->excl = src->excl + level_size;
2383                 dst->excl_cmpr = src->excl_cmpr + level_size;
2384         }
2385
2386 unlock:
2387         spin_unlock(&fs_info->qgroup_lock);
2388 out:
2389         mutex_unlock(&fs_info->qgroup_ioctl_lock);
2390         return ret;
2391 }
2392
2393 static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
2394 {
2395         if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
2396             qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer)
2397                 return false;
2398
2399         if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
2400             qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl)
2401                 return false;
2402
2403         return true;
2404 }
2405
2406 static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
2407                           enum btrfs_qgroup_rsv_type type)
2408 {
2409         struct btrfs_root *quota_root;
2410         struct btrfs_qgroup *qgroup;
2411         struct btrfs_fs_info *fs_info = root->fs_info;
2412         u64 ref_root = root->root_key.objectid;
2413         int ret = 0;
2414         int retried = 0;
2415         struct ulist_node *unode;
2416         struct ulist_iterator uiter;
2417
2418         if (!is_fstree(ref_root))
2419                 return 0;
2420
2421         if (num_bytes == 0)
2422                 return 0;
2423
2424         if (test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags) &&
2425             capable(CAP_SYS_RESOURCE))
2426                 enforce = false;
2427
2428 retry:
2429         spin_lock(&fs_info->qgroup_lock);
2430         quota_root = fs_info->quota_root;
2431         if (!quota_root)
2432                 goto out;
2433
2434         qgroup = find_qgroup_rb(fs_info, ref_root);
2435         if (!qgroup)
2436                 goto out;
2437
2438         /*
2439          * in a first step, we check all affected qgroups if any limits would
2440          * be exceeded
2441          */
2442         ulist_reinit(fs_info->qgroup_ulist);
2443         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
2444                         (uintptr_t)qgroup, GFP_ATOMIC);
2445         if (ret < 0)
2446                 goto out;
2447         ULIST_ITER_INIT(&uiter);
2448         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2449                 struct btrfs_qgroup *qg;
2450                 struct btrfs_qgroup_list *glist;
2451
2452                 qg = unode_aux_to_qgroup(unode);
2453
2454                 if (enforce && !qgroup_check_limits(qg, num_bytes)) {
2455                         /*
2456                          * Commit the tree and retry, since we may have
2457                          * deletions which would free up space.
2458                          */
2459                         if (!retried && qgroup_rsv_total(qg) > 0) {
2460                                 struct btrfs_trans_handle *trans;
2461
2462                                 spin_unlock(&fs_info->qgroup_lock);
2463                                 ret = btrfs_start_delalloc_inodes(root, 0);
2464                                 if (ret)
2465                                         return ret;
2466                                 btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
2467                                 trans = btrfs_join_transaction(root);
2468                                 if (IS_ERR(trans))
2469                                         return PTR_ERR(trans);
2470                                 ret = btrfs_commit_transaction(trans);
2471                                 if (ret)
2472                                         return ret;
2473                                 retried++;
2474                                 goto retry;
2475                         }
2476                         ret = -EDQUOT;
2477                         goto out;
2478                 }
2479
2480                 list_for_each_entry(glist, &qg->groups, next_group) {
2481                         ret = ulist_add(fs_info->qgroup_ulist,
2482                                         glist->group->qgroupid,
2483                                         (uintptr_t)glist->group, GFP_ATOMIC);
2484                         if (ret < 0)
2485                                 goto out;
2486                 }
2487         }
2488         ret = 0;
2489         /*
2490          * no limits exceeded, now record the reservation into all qgroups
2491          */
2492         ULIST_ITER_INIT(&uiter);
2493         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2494                 struct btrfs_qgroup *qg;
2495
2496                 qg = unode_aux_to_qgroup(unode);
2497
2498                 trace_qgroup_update_reserve(fs_info, qg, num_bytes);
2499                 qgroup_rsv_add(qg, num_bytes, type);
2500         }
2501
2502 out:
2503         spin_unlock(&fs_info->qgroup_lock);
2504         return ret;
2505 }
2506
2507 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
2508                                u64 ref_root, u64 num_bytes,
2509                                enum btrfs_qgroup_rsv_type type)
2510 {
2511         struct btrfs_root *quota_root;
2512         struct btrfs_qgroup *qgroup;
2513         struct ulist_node *unode;
2514         struct ulist_iterator uiter;
2515         int ret = 0;
2516
2517         if (!is_fstree(ref_root))
2518                 return;
2519
2520         if (num_bytes == 0)
2521                 return;
2522
2523         spin_lock(&fs_info->qgroup_lock);
2524
2525         quota_root = fs_info->quota_root;
2526         if (!quota_root)
2527                 goto out;
2528
2529         qgroup = find_qgroup_rb(fs_info, ref_root);
2530         if (!qgroup)
2531                 goto out;
2532
2533         ulist_reinit(fs_info->qgroup_ulist);
2534         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
2535                         (uintptr_t)qgroup, GFP_ATOMIC);
2536         if (ret < 0)
2537                 goto out;
2538         ULIST_ITER_INIT(&uiter);
2539         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2540                 struct btrfs_qgroup *qg;
2541                 struct btrfs_qgroup_list *glist;
2542
2543                 qg = unode_aux_to_qgroup(unode);
2544
2545                 trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes);
2546                 qgroup_rsv_release(qg, num_bytes, type);
2547
2548                 list_for_each_entry(glist, &qg->groups, next_group) {
2549                         ret = ulist_add(fs_info->qgroup_ulist,
2550                                         glist->group->qgroupid,
2551                                         (uintptr_t)glist->group, GFP_ATOMIC);
2552                         if (ret < 0)
2553                                 goto out;
2554                 }
2555         }
2556
2557 out:
2558         spin_unlock(&fs_info->qgroup_lock);
2559 }
2560
2561 /*
2562  * returns < 0 on error, 0 when more leafs are to be scanned.
2563  * returns 1 when done.
2564  */
2565 static int
2566 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
2567                    struct btrfs_trans_handle *trans)
2568 {
2569         struct btrfs_key found;
2570         struct extent_buffer *scratch_leaf = NULL;
2571         struct ulist *roots = NULL;
2572         struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
2573         u64 num_bytes;
2574         int slot;
2575         int ret;
2576
2577         mutex_lock(&fs_info->qgroup_rescan_lock);
2578         ret = btrfs_search_slot_for_read(fs_info->extent_root,
2579                                          &fs_info->qgroup_rescan_progress,
2580                                          path, 1, 0);
2581
2582         btrfs_debug(fs_info,
2583                 "current progress key (%llu %u %llu), search_slot ret %d",
2584                 fs_info->qgroup_rescan_progress.objectid,
2585                 fs_info->qgroup_rescan_progress.type,
2586                 fs_info->qgroup_rescan_progress.offset, ret);
2587
2588         if (ret) {
2589                 /*
2590                  * The rescan is about to end, we will not be scanning any
2591                  * further blocks. We cannot unset the RESCAN flag here, because
2592                  * we want to commit the transaction if everything went well.
2593                  * To make the live accounting work in this phase, we set our
2594                  * scan progress pointer such that every real extent objectid
2595                  * will be smaller.
2596                  */
2597                 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
2598                 btrfs_release_path(path);
2599                 mutex_unlock(&fs_info->qgroup_rescan_lock);
2600                 return ret;
2601         }
2602
2603         btrfs_item_key_to_cpu(path->nodes[0], &found,
2604                               btrfs_header_nritems(path->nodes[0]) - 1);
2605         fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
2606
2607         btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2608         scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]);
2609         if (!scratch_leaf) {
2610                 ret = -ENOMEM;
2611                 mutex_unlock(&fs_info->qgroup_rescan_lock);
2612                 goto out;
2613         }
2614         extent_buffer_get(scratch_leaf);
2615         btrfs_tree_read_lock(scratch_leaf);
2616         btrfs_set_lock_blocking_rw(scratch_leaf, BTRFS_READ_LOCK);
2617         slot = path->slots[0];
2618         btrfs_release_path(path);
2619         mutex_unlock(&fs_info->qgroup_rescan_lock);
2620
2621         for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
2622                 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
2623                 if (found.type != BTRFS_EXTENT_ITEM_KEY &&
2624                     found.type != BTRFS_METADATA_ITEM_KEY)
2625                         continue;
2626                 if (found.type == BTRFS_METADATA_ITEM_KEY)
2627                         num_bytes = fs_info->nodesize;
2628                 else
2629                         num_bytes = found.offset;
2630
2631                 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
2632                                            &roots, false);
2633                 if (ret < 0)
2634                         goto out;
2635                 /* For rescan, just pass old_roots as NULL */
2636                 ret = btrfs_qgroup_account_extent(trans, fs_info,
2637                                 found.objectid, num_bytes, NULL, roots);
2638                 if (ret < 0)
2639                         goto out;
2640         }
2641 out:
2642         if (scratch_leaf) {
2643                 btrfs_tree_read_unlock_blocking(scratch_leaf);
2644                 free_extent_buffer(scratch_leaf);
2645         }
2646         btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2647
2648         return ret;
2649 }
2650
2651 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2652 {
2653         struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
2654                                                      qgroup_rescan_work);
2655         struct btrfs_path *path;
2656         struct btrfs_trans_handle *trans = NULL;
2657         int err = -ENOMEM;
2658         int ret = 0;
2659
2660         path = btrfs_alloc_path();
2661         if (!path)
2662                 goto out;
2663
2664         err = 0;
2665         while (!err && !btrfs_fs_closing(fs_info)) {
2666                 trans = btrfs_start_transaction(fs_info->fs_root, 0);
2667                 if (IS_ERR(trans)) {
2668                         err = PTR_ERR(trans);
2669                         break;
2670                 }
2671                 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
2672                         err = -EINTR;
2673                 } else {
2674                         err = qgroup_rescan_leaf(fs_info, path, trans);
2675                 }
2676                 if (err > 0)
2677                         btrfs_commit_transaction(trans);
2678                 else
2679                         btrfs_end_transaction(trans);
2680         }
2681
2682 out:
2683         btrfs_free_path(path);
2684
2685         mutex_lock(&fs_info->qgroup_rescan_lock);
2686         if (!btrfs_fs_closing(fs_info))
2687                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2688
2689         if (err > 0 &&
2690             fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2691                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2692         } else if (err < 0) {
2693                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2694         }
2695         mutex_unlock(&fs_info->qgroup_rescan_lock);
2696
2697         /*
2698          * only update status, since the previous part has already updated the
2699          * qgroup info.
2700          */
2701         trans = btrfs_start_transaction(fs_info->quota_root, 1);
2702         if (IS_ERR(trans)) {
2703                 err = PTR_ERR(trans);
2704                 btrfs_err(fs_info,
2705                           "fail to start transaction for status update: %d",
2706                           err);
2707                 goto done;
2708         }
2709         ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
2710         if (ret < 0) {
2711                 err = ret;
2712                 btrfs_err(fs_info, "fail to update qgroup status: %d", err);
2713         }
2714         btrfs_end_transaction(trans);
2715
2716         if (btrfs_fs_closing(fs_info)) {
2717                 btrfs_info(fs_info, "qgroup scan paused");
2718         } else if (err >= 0) {
2719                 btrfs_info(fs_info, "qgroup scan completed%s",
2720                         err > 0 ? " (inconsistency flag cleared)" : "");
2721         } else {
2722                 btrfs_err(fs_info, "qgroup scan failed with %d", err);
2723         }
2724
2725 done:
2726         mutex_lock(&fs_info->qgroup_rescan_lock);
2727         fs_info->qgroup_rescan_running = false;
2728         mutex_unlock(&fs_info->qgroup_rescan_lock);
2729         complete_all(&fs_info->qgroup_rescan_completion);
2730 }
2731
2732 /*
2733  * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2734  * memory required for the rescan context.
2735  */
2736 static int
2737 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2738                    int init_flags)
2739 {
2740         int ret = 0;
2741
2742         if (!init_flags &&
2743             (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
2744              !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
2745                 ret = -EINVAL;
2746                 goto err;
2747         }
2748
2749         mutex_lock(&fs_info->qgroup_rescan_lock);
2750         spin_lock(&fs_info->qgroup_lock);
2751
2752         if (init_flags) {
2753                 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2754                         ret = -EINPROGRESS;
2755                 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2756                         ret = -EINVAL;
2757
2758                 if (ret) {
2759                         spin_unlock(&fs_info->qgroup_lock);
2760                         mutex_unlock(&fs_info->qgroup_rescan_lock);
2761                         goto err;
2762                 }
2763                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2764         }
2765
2766         memset(&fs_info->qgroup_rescan_progress, 0,
2767                 sizeof(fs_info->qgroup_rescan_progress));
2768         fs_info->qgroup_rescan_progress.objectid = progress_objectid;
2769         init_completion(&fs_info->qgroup_rescan_completion);
2770         fs_info->qgroup_rescan_running = true;
2771
2772         spin_unlock(&fs_info->qgroup_lock);
2773         mutex_unlock(&fs_info->qgroup_rescan_lock);
2774
2775         memset(&fs_info->qgroup_rescan_work, 0,
2776                sizeof(fs_info->qgroup_rescan_work));
2777         btrfs_init_work(&fs_info->qgroup_rescan_work,
2778                         btrfs_qgroup_rescan_helper,
2779                         btrfs_qgroup_rescan_worker, NULL, NULL);
2780
2781         if (ret) {
2782 err:
2783                 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
2784                 return ret;
2785         }
2786
2787         return 0;
2788 }
2789
2790 static void
2791 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
2792 {
2793         struct rb_node *n;
2794         struct btrfs_qgroup *qgroup;
2795
2796         spin_lock(&fs_info->qgroup_lock);
2797         /* clear all current qgroup tracking information */
2798         for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2799                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
2800                 qgroup->rfer = 0;
2801                 qgroup->rfer_cmpr = 0;
2802                 qgroup->excl = 0;
2803                 qgroup->excl_cmpr = 0;
2804         }
2805         spin_unlock(&fs_info->qgroup_lock);
2806 }
2807
2808 int
2809 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2810 {
2811         int ret = 0;
2812         struct btrfs_trans_handle *trans;
2813
2814         ret = qgroup_rescan_init(fs_info, 0, 1);
2815         if (ret)
2816                 return ret;
2817
2818         /*
2819          * We have set the rescan_progress to 0, which means no more
2820          * delayed refs will be accounted by btrfs_qgroup_account_ref.
2821          * However, btrfs_qgroup_account_ref may be right after its call
2822          * to btrfs_find_all_roots, in which case it would still do the
2823          * accounting.
2824          * To solve this, we're committing the transaction, which will
2825          * ensure we run all delayed refs and only after that, we are
2826          * going to clear all tracking information for a clean start.
2827          */
2828
2829         trans = btrfs_join_transaction(fs_info->fs_root);
2830         if (IS_ERR(trans)) {
2831                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2832                 return PTR_ERR(trans);
2833         }
2834         ret = btrfs_commit_transaction(trans);
2835         if (ret) {
2836                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2837                 return ret;
2838         }
2839
2840         qgroup_rescan_zero_tracking(fs_info);
2841
2842         btrfs_queue_work(fs_info->qgroup_rescan_workers,
2843                          &fs_info->qgroup_rescan_work);
2844
2845         return 0;
2846 }
2847
2848 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
2849                                      bool interruptible)
2850 {
2851         int running;
2852         int ret = 0;
2853
2854         mutex_lock(&fs_info->qgroup_rescan_lock);
2855         spin_lock(&fs_info->qgroup_lock);
2856         running = fs_info->qgroup_rescan_running;
2857         spin_unlock(&fs_info->qgroup_lock);
2858         mutex_unlock(&fs_info->qgroup_rescan_lock);
2859
2860         if (!running)
2861                 return 0;
2862
2863         if (interruptible)
2864                 ret = wait_for_completion_interruptible(
2865                                         &fs_info->qgroup_rescan_completion);
2866         else
2867                 wait_for_completion(&fs_info->qgroup_rescan_completion);
2868
2869         return ret;
2870 }
2871
2872 /*
2873  * this is only called from open_ctree where we're still single threaded, thus
2874  * locking is omitted here.
2875  */
2876 void
2877 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
2878 {
2879         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2880                 btrfs_queue_work(fs_info->qgroup_rescan_workers,
2881                                  &fs_info->qgroup_rescan_work);
2882 }
2883
2884 /*
2885  * Reserve qgroup space for range [start, start + len).
2886  *
2887  * This function will either reserve space from related qgroups or doing
2888  * nothing if the range is already reserved.
2889  *
2890  * Return 0 for successful reserve
2891  * Return <0 for error (including -EQUOT)
2892  *
2893  * NOTE: this function may sleep for memory allocation.
2894  *       if btrfs_qgroup_reserve_data() is called multiple times with
2895  *       same @reserved, caller must ensure when error happens it's OK
2896  *       to free *ALL* reserved space.
2897  */
2898 int btrfs_qgroup_reserve_data(struct inode *inode,
2899                         struct extent_changeset **reserved_ret, u64 start,
2900                         u64 len)
2901 {
2902         struct btrfs_root *root = BTRFS_I(inode)->root;
2903         struct ulist_node *unode;
2904         struct ulist_iterator uiter;
2905         struct extent_changeset *reserved;
2906         u64 orig_reserved;
2907         u64 to_reserve;
2908         int ret;
2909
2910         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) ||
2911             !is_fstree(root->objectid) || len == 0)
2912                 return 0;
2913
2914         /* @reserved parameter is mandatory for qgroup */
2915         if (WARN_ON(!reserved_ret))
2916                 return -EINVAL;
2917         if (!*reserved_ret) {
2918                 *reserved_ret = extent_changeset_alloc();
2919                 if (!*reserved_ret)
2920                         return -ENOMEM;
2921         }
2922         reserved = *reserved_ret;
2923         /* Record already reserved space */
2924         orig_reserved = reserved->bytes_changed;
2925         ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
2926                         start + len -1, EXTENT_QGROUP_RESERVED, reserved);
2927
2928         /* Newly reserved space */
2929         to_reserve = reserved->bytes_changed - orig_reserved;
2930         trace_btrfs_qgroup_reserve_data(inode, start, len,
2931                                         to_reserve, QGROUP_RESERVE);
2932         if (ret < 0)
2933                 goto cleanup;
2934         ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA);
2935         if (ret < 0)
2936                 goto cleanup;
2937
2938         return ret;
2939
2940 cleanup:
2941         /* cleanup *ALL* already reserved ranges */
2942         ULIST_ITER_INIT(&uiter);
2943         while ((unode = ulist_next(&reserved->range_changed, &uiter)))
2944                 clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
2945                                  unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL);
2946         extent_changeset_release(reserved);
2947         return ret;
2948 }
2949
2950 /* Free ranges specified by @reserved, normally in error path */
2951 static int qgroup_free_reserved_data(struct inode *inode,
2952                         struct extent_changeset *reserved, u64 start, u64 len)
2953 {
2954         struct btrfs_root *root = BTRFS_I(inode)->root;
2955         struct ulist_node *unode;
2956         struct ulist_iterator uiter;
2957         struct extent_changeset changeset;
2958         int freed = 0;
2959         int ret;
2960
2961         extent_changeset_init(&changeset);
2962         len = round_up(start + len, root->fs_info->sectorsize);
2963         start = round_down(start, root->fs_info->sectorsize);
2964
2965         ULIST_ITER_INIT(&uiter);
2966         while ((unode = ulist_next(&reserved->range_changed, &uiter))) {
2967                 u64 range_start = unode->val;
2968                 /* unode->aux is the inclusive end */
2969                 u64 range_len = unode->aux - range_start + 1;
2970                 u64 free_start;
2971                 u64 free_len;
2972
2973                 extent_changeset_release(&changeset);
2974
2975                 /* Only free range in range [start, start + len) */
2976                 if (range_start >= start + len ||
2977                     range_start + range_len <= start)
2978                         continue;
2979                 free_start = max(range_start, start);
2980                 free_len = min(start + len, range_start + range_len) -
2981                            free_start;
2982                 /*
2983                  * TODO: To also modify reserved->ranges_reserved to reflect
2984                  * the modification.
2985                  *
2986                  * However as long as we free qgroup reserved according to
2987                  * EXTENT_QGROUP_RESERVED, we won't double free.
2988                  * So not need to rush.
2989                  */
2990                 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_failure_tree,
2991                                 free_start, free_start + free_len - 1,
2992                                 EXTENT_QGROUP_RESERVED, &changeset);
2993                 if (ret < 0)
2994                         goto out;
2995                 freed += changeset.bytes_changed;
2996         }
2997         btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed,
2998                                   BTRFS_QGROUP_RSV_DATA);
2999         ret = freed;
3000 out:
3001         extent_changeset_release(&changeset);
3002         return ret;
3003 }
3004
3005 static int __btrfs_qgroup_release_data(struct inode *inode,
3006                         struct extent_changeset *reserved, u64 start, u64 len,
3007                         int free)
3008 {
3009         struct extent_changeset changeset;
3010         int trace_op = QGROUP_RELEASE;
3011         int ret;
3012
3013         /* In release case, we shouldn't have @reserved */
3014         WARN_ON(!free && reserved);
3015         if (free && reserved)
3016                 return qgroup_free_reserved_data(inode, reserved, start, len);
3017         extent_changeset_init(&changeset);
3018         ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 
3019                         start + len -1, EXTENT_QGROUP_RESERVED, &changeset);
3020         if (ret < 0)
3021                 goto out;
3022
3023         if (free)
3024                 trace_op = QGROUP_FREE;
3025         trace_btrfs_qgroup_release_data(inode, start, len,
3026                                         changeset.bytes_changed, trace_op);
3027         if (free)
3028                 btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
3029                                 BTRFS_I(inode)->root->objectid,
3030                                 changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
3031         ret = changeset.bytes_changed;
3032 out:
3033         extent_changeset_release(&changeset);
3034         return ret;
3035 }
3036
3037 /*
3038  * Free a reserved space range from io_tree and related qgroups
3039  *
3040  * Should be called when a range of pages get invalidated before reaching disk.
3041  * Or for error cleanup case.
3042  * if @reserved is given, only reserved range in [@start, @start + @len) will
3043  * be freed.
3044  *
3045  * For data written to disk, use btrfs_qgroup_release_data().
3046  *
3047  * NOTE: This function may sleep for memory allocation.
3048  */
3049 int btrfs_qgroup_free_data(struct inode *inode,
3050                         struct extent_changeset *reserved, u64 start, u64 len)
3051 {
3052         return __btrfs_qgroup_release_data(inode, reserved, start, len, 1);
3053 }
3054
3055 /*
3056  * Release a reserved space range from io_tree only.
3057  *
3058  * Should be called when a range of pages get written to disk and corresponding
3059  * FILE_EXTENT is inserted into corresponding root.
3060  *
3061  * Since new qgroup accounting framework will only update qgroup numbers at
3062  * commit_transaction() time, its reserved space shouldn't be freed from
3063  * related qgroups.
3064  *
3065  * But we should release the range from io_tree, to allow further write to be
3066  * COWed.
3067  *
3068  * NOTE: This function may sleep for memory allocation.
3069  */
3070 int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
3071 {
3072         return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
3073 }
3074
3075 int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
3076                               bool enforce)
3077 {
3078         struct btrfs_fs_info *fs_info = root->fs_info;
3079         int ret;
3080
3081         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
3082             !is_fstree(root->objectid) || num_bytes == 0)
3083                 return 0;
3084
3085         BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
3086         trace_qgroup_meta_reserve(root, (s64)num_bytes);
3087         ret = qgroup_reserve(root, num_bytes, enforce, BTRFS_QGROUP_RSV_META);
3088         if (ret < 0)
3089                 return ret;
3090         atomic64_add(num_bytes, &root->qgroup_meta_rsv);
3091         return ret;
3092 }
3093
3094 void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
3095 {
3096         struct btrfs_fs_info *fs_info = root->fs_info;
3097         u64 reserved;
3098
3099         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
3100             !is_fstree(root->objectid))
3101                 return;
3102
3103         reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0);
3104         if (reserved == 0)
3105                 return;
3106         trace_qgroup_meta_reserve(root, -(s64)reserved);
3107         btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved,
3108                                   BTRFS_QGROUP_RSV_META);
3109 }
3110
3111 void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
3112 {
3113         struct btrfs_fs_info *fs_info = root->fs_info;
3114
3115         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
3116             !is_fstree(root->objectid))
3117                 return;
3118
3119         BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
3120         WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes);
3121         atomic64_sub(num_bytes, &root->qgroup_meta_rsv);
3122         trace_qgroup_meta_reserve(root, -(s64)num_bytes);
3123         btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes,
3124                                   BTRFS_QGROUP_RSV_META);
3125 }
3126
3127 /*
3128  * Check qgroup reserved space leaking, normally at destroy inode
3129  * time
3130  */
3131 void btrfs_qgroup_check_reserved_leak(struct inode *inode)
3132 {
3133         struct extent_changeset changeset;
3134         struct ulist_node *unode;
3135         struct ulist_iterator iter;
3136         int ret;
3137
3138         extent_changeset_init(&changeset);
3139         ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
3140                         EXTENT_QGROUP_RESERVED, &changeset);
3141
3142         WARN_ON(ret < 0);
3143         if (WARN_ON(changeset.bytes_changed)) {
3144                 ULIST_ITER_INIT(&iter);
3145                 while ((unode = ulist_next(&changeset.range_changed, &iter))) {
3146                         btrfs_warn(BTRFS_I(inode)->root->fs_info,
3147                                 "leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu",
3148                                 inode->i_ino, unode->val, unode->aux);
3149                 }
3150                 btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
3151                                 BTRFS_I(inode)->root->objectid,
3152                                 changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
3153
3154         }
3155         extent_changeset_release(&changeset);
3156 }