btrfs: qgroup: account shared subtrees during snapshot delete
[jlayton/linux.git] / fs / btrfs / qgroup.c
index 98cb6b2630f9aac60972b5aea3c372e9e99eeba5..65b62c467e28e2ce7ae0f599a57b844c8fa30cd8 100644 (file)
@@ -1201,6 +1201,50 @@ out:
        mutex_unlock(&fs_info->qgroup_ioctl_lock);
        return ret;
 }
+
+static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
+                          struct btrfs_qgroup_operation *oper2)
+{
+       /*
+        * Ignore seq and type here, we're looking for any operation
+        * at all related to this extent on that root.
+        */
+       if (oper1->bytenr < oper2->bytenr)
+               return -1;
+       if (oper1->bytenr > oper2->bytenr)
+               return 1;
+       if (oper1->ref_root < oper2->ref_root)
+               return -1;
+       if (oper1->ref_root > oper2->ref_root)
+               return 1;
+       return 0;
+}
+
+static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
+                             struct btrfs_qgroup_operation *oper)
+{
+       struct rb_node *n;
+       struct btrfs_qgroup_operation *cur;
+       int cmp;
+
+       spin_lock(&fs_info->qgroup_op_lock);
+       n = fs_info->qgroup_op_tree.rb_node;
+       while (n) {
+               cur = rb_entry(n, struct btrfs_qgroup_operation, n);
+               cmp = comp_oper_exist(cur, oper);
+               if (cmp < 0) {
+                       n = n->rb_right;
+               } else if (cmp) {
+                       n = n->rb_left;
+               } else {
+                       spin_unlock(&fs_info->qgroup_op_lock);
+                       return -EEXIST;
+               }
+       }
+       spin_unlock(&fs_info->qgroup_op_lock);
+       return 0;
+}
+
 static int comp_oper(struct btrfs_qgroup_operation *oper1,
                     struct btrfs_qgroup_operation *oper2)
 {
@@ -1290,6 +1334,23 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
        oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
        INIT_LIST_HEAD(&oper->elem.list);
        oper->elem.seq = 0;
+
+       if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
+               /*
+                * If any operation for this bytenr/ref_root combo
+                * exists, then we know it's not exclusively owned and
+                * shouldn't be queued up.
+                *
+                * This also catches the case where we have a cloned
+                * extent that gets queued up multiple times during
+                * drop snapshot.
+                */
+               if (qgroup_oper_exists(fs_info, oper)) {
+                       kfree(oper);
+                       return 0;
+               }
+       }
+
        ret = insert_qgroup_oper(fs_info, oper);
        if (ret) {
                /* Shouldn't happen so have an assert for developers */
@@ -1883,6 +1944,106 @@ out:
        return ret;
 }
 
+/*
+ * Process a reference to a shared subtree. This type of operation is
+ * queued during snapshot removal when we encounter extents which are
+ * shared between more than one root.
+ */
+static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
+                                    struct btrfs_fs_info *fs_info,
+                                    struct btrfs_qgroup_operation *oper)
+{
+       struct ulist *roots = NULL;
+       struct ulist_node *unode;
+       struct ulist_iterator uiter;
+       struct btrfs_qgroup_list *glist;
+       struct ulist *parents;
+       int ret = 0;
+       struct btrfs_qgroup *qg;
+       u64 root_obj = 0;
+       struct seq_list elem = {};
+
+       parents = ulist_alloc(GFP_NOFS);
+       if (!parents)
+               return -ENOMEM;
+
+       btrfs_get_tree_mod_seq(fs_info, &elem);
+       ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
+                                  elem.seq, &roots);
+       btrfs_put_tree_mod_seq(fs_info, &elem);
+       if (ret < 0)
+               return ret;
+
+       if (roots->nnodes != 1)
+               goto out;
+
+       ULIST_ITER_INIT(&uiter);
+       unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
+       /*
+        * If we find our ref root then that means all refs
+        * this extent has to the root have not yet been
+        * deleted. In that case, we do nothing and let the
+        * last ref for this bytenr drive our update.
+        *
+        * This can happen for example if an extent is
+        * referenced multiple times in a snapshot (clone,
+        * etc). If we are in the middle of snapshot removal,
+        * queued updates for such an extent will find the
+        * root if we have not yet finished removing the
+        * snapshot.
+        */
+       if (unode->val == oper->ref_root)
+               goto out;
+
+       root_obj = unode->val;
+       BUG_ON(!root_obj);
+
+       spin_lock(&fs_info->qgroup_lock);
+       qg = find_qgroup_rb(fs_info, root_obj);
+       if (!qg)
+               goto out_unlock;
+
+       qg->excl += oper->num_bytes;
+       qg->excl_cmpr += oper->num_bytes;
+       qgroup_dirty(fs_info, qg);
+
+       /*
+        * Adjust counts for parent groups. First we find all
+        * parents, then in the 2nd loop we do the adjustment
+        * while adding parents of the parents to our ulist.
+        */
+       list_for_each_entry(glist, &qg->groups, next_group) {
+               ret = ulist_add(parents, glist->group->qgroupid,
+                               ptr_to_u64(glist->group), GFP_ATOMIC);
+               if (ret < 0)
+                       goto out_unlock;
+       }
+
+       ULIST_ITER_INIT(&uiter);
+       while ((unode = ulist_next(parents, &uiter))) {
+               qg = u64_to_ptr(unode->aux);
+               qg->excl += oper->num_bytes;
+               qg->excl_cmpr += oper->num_bytes;
+               qgroup_dirty(fs_info, qg);
+
+               /* Add any parents of the parents */
+               list_for_each_entry(glist, &qg->groups, next_group) {
+                       ret = ulist_add(parents, glist->group->qgroupid,
+                                       ptr_to_u64(glist->group), GFP_ATOMIC);
+                       if (ret < 0)
+                               goto out_unlock;
+               }
+       }
+
+out_unlock:
+       spin_unlock(&fs_info->qgroup_lock);
+
+out:
+       ulist_free(roots);
+       ulist_free(parents);
+       return ret;
+}
+
 /*
  * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
  * from the fs. First, all roots referencing the extent are searched, and
@@ -1920,6 +2081,9 @@ static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
        case BTRFS_QGROUP_OPER_SUB_SHARED:
                ret = qgroup_shared_accounting(trans, fs_info, oper);
                break;
+       case BTRFS_QGROUP_OPER_SUB_SUBTREE:
+               ret = qgroup_subtree_accounting(trans, fs_info, oper);
+               break;
        default:
                ASSERT(0);
        }