btrfs: Fix lock release order
[sfrench/cifs-2.6.git] / fs / btrfs / extent-tree.c
index e08d0d45af4f3d31513107cbb4deaa7d5c244c24..cd2f5220577fbb466c3926dd12222e899c94c5f4 100644 (file)
@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2007 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
  */
+
 #include <linux/sched.h>
 #include <linux/sched/signal.h>
 #include <linux/pagemap.h>
@@ -756,12 +744,12 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
 }
 
 static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes,
-                            u64 owner, u64 root_objectid)
+                            bool metadata, u64 root_objectid)
 {
        struct btrfs_space_info *space_info;
        u64 flags;
 
-       if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+       if (metadata) {
                if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
                        flags = BTRFS_BLOCK_GROUP_SYSTEM;
                else
@@ -2212,8 +2200,11 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                                                 &old_ref_mod, &new_ref_mod);
        }
 
-       if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
-               add_pinned_bytes(fs_info, -num_bytes, owner, root_objectid);
+       if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) {
+               bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+
+               add_pinned_bytes(fs_info, -num_bytes, metadata, root_objectid);
+       }
 
        return ret;
 }
@@ -2606,20 +2597,26 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
        delayed_refs->num_heads--;
        rb_erase(&head->href_node, &delayed_refs->href_root);
        RB_CLEAR_NODE(&head->href_node);
-       spin_unlock(&delayed_refs->lock);
        spin_unlock(&head->lock);
+       spin_unlock(&delayed_refs->lock);
        atomic_dec(&delayed_refs->num_entries);
 
        trace_run_delayed_ref_head(fs_info, head, 0);
 
        if (head->total_ref_mod < 0) {
-               struct btrfs_block_group_cache *cache;
+               struct btrfs_space_info *space_info;
+               u64 flags;
 
-               cache = btrfs_lookup_block_group(fs_info, head->bytenr);
-               ASSERT(cache);
-               percpu_counter_add(&cache->space_info->total_bytes_pinned,
+               if (head->is_data)
+                       flags = BTRFS_BLOCK_GROUP_DATA;
+               else if (head->is_system)
+                       flags = BTRFS_BLOCK_GROUP_SYSTEM;
+               else
+                       flags = BTRFS_BLOCK_GROUP_METADATA;
+               space_info = __find_space_info(fs_info, flags);
+               ASSERT(space_info);
+               percpu_counter_add(&space_info->total_bytes_pinned,
                                   -head->num_bytes);
-               btrfs_put_block_group(cache);
 
                if (head->is_data) {
                        spin_lock(&delayed_refs->lock);
@@ -2716,7 +2713,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                ref = select_delayed_ref(locked_ref);
 
                if (ref && ref->seq &&
-                   btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
+                   btrfs_check_delayed_seq(fs_info, ref->seq)) {
                        spin_unlock(&locked_ref->lock);
                        unselect_delayed_ref_head(delayed_refs, locked_ref);
                        locked_ref = NULL;
@@ -3148,7 +3145,11 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
        struct rb_node *node;
        int ret = 0;
 
+       spin_lock(&root->fs_info->trans_lock);
        cur_trans = root->fs_info->running_transaction;
+       if (cur_trans)
+               refcount_inc(&cur_trans->use_count);
+       spin_unlock(&root->fs_info->trans_lock);
        if (!cur_trans)
                return 0;
 
@@ -3157,6 +3158,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
        head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
        if (!head) {
                spin_unlock(&delayed_refs->lock);
+               btrfs_put_transaction(cur_trans);
                return 0;
        }
 
@@ -3173,6 +3175,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
                mutex_lock(&head->mutex);
                mutex_unlock(&head->mutex);
                btrfs_put_delayed_ref_head(head);
+               btrfs_put_transaction(cur_trans);
                return -EAGAIN;
        }
        spin_unlock(&delayed_refs->lock);
@@ -3205,6 +3208,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
        }
        spin_unlock(&head->lock);
        mutex_unlock(&head->mutex);
+       btrfs_put_transaction(cur_trans);
        return ret;
 }
 
@@ -4642,6 +4646,7 @@ again:
        if (wait_for_alloc) {
                mutex_unlock(&fs_info->chunk_mutex);
                wait_for_alloc = 0;
+               cond_resched();
                goto again;
        }
 
@@ -5570,14 +5575,18 @@ again:
 
 static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
                                    struct btrfs_block_rsv *block_rsv,
-                                   struct btrfs_block_rsv *dest, u64 num_bytes)
+                                   struct btrfs_block_rsv *dest, u64 num_bytes,
+                                   u64 *qgroup_to_release_ret)
 {
        struct btrfs_space_info *space_info = block_rsv->space_info;
+       u64 qgroup_to_release = 0;
        u64 ret;
 
        spin_lock(&block_rsv->lock);
-       if (num_bytes == (u64)-1)
+       if (num_bytes == (u64)-1) {
                num_bytes = block_rsv->size;
+               qgroup_to_release = block_rsv->qgroup_rsv_size;
+       }
        block_rsv->size -= num_bytes;
        if (block_rsv->reserved >= block_rsv->size) {
                num_bytes = block_rsv->reserved - block_rsv->size;
@@ -5586,6 +5595,13 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
        } else {
                num_bytes = 0;
        }
+       if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
+               qgroup_to_release = block_rsv->qgroup_rsv_reserved -
+                                   block_rsv->qgroup_rsv_size;
+               block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
+       } else {
+               qgroup_to_release = 0;
+       }
        spin_unlock(&block_rsv->lock);
 
        ret = num_bytes;
@@ -5608,6 +5624,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
                        space_info_add_old_bytes(fs_info, space_info,
                                                 num_bytes);
        }
+       if (qgroup_to_release_ret)
+               *qgroup_to_release_ret = qgroup_to_release;
        return ret;
 }
 
@@ -5749,17 +5767,21 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
        struct btrfs_root *root = inode->root;
        struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
        u64 num_bytes = 0;
+       u64 qgroup_num_bytes = 0;
        int ret = -ENOSPC;
 
        spin_lock(&block_rsv->lock);
        if (block_rsv->reserved < block_rsv->size)
                num_bytes = block_rsv->size - block_rsv->reserved;
+       if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
+               qgroup_num_bytes = block_rsv->qgroup_rsv_size -
+                                  block_rsv->qgroup_rsv_reserved;
        spin_unlock(&block_rsv->lock);
 
        if (num_bytes == 0)
                return 0;
 
-       ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
+       ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true);
        if (ret)
                return ret;
        ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
@@ -5767,7 +5789,13 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
                block_rsv_add_bytes(block_rsv, num_bytes, 0);
                trace_btrfs_space_reservation(root->fs_info, "delalloc",
                                              btrfs_ino(inode), num_bytes, 1);
-       }
+
+               /* Don't forget to increase qgroup_rsv_reserved */
+               spin_lock(&block_rsv->lock);
+               block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
+               spin_unlock(&block_rsv->lock);
+       } else
+               btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
        return ret;
 }
 
@@ -5788,20 +5816,23 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
        struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
        u64 released = 0;
+       u64 qgroup_to_release = 0;
 
        /*
         * Since we statically set the block_rsv->size we just want to say we
         * are releasing 0 bytes, and then we'll just get the reservation over
         * the size free'd.
         */
-       released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0);
+       released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0,
+                                          &qgroup_to_release);
        if (released > 0)
                trace_btrfs_space_reservation(fs_info, "delalloc",
                                              btrfs_ino(inode), released, 0);
        if (qgroup_free)
-               btrfs_qgroup_free_meta_prealloc(inode->root, released);
+               btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
        else
-               btrfs_qgroup_convert_reserved_meta(inode->root, released);
+               btrfs_qgroup_convert_reserved_meta(inode->root,
+                                                  qgroup_to_release);
 }
 
 void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
@@ -5813,7 +5844,7 @@ void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
        if (global_rsv == block_rsv ||
            block_rsv->space_info != global_rsv->space_info)
                global_rsv = NULL;
-       block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes);
+       block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes, NULL);
 }
 
 static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
@@ -5893,7 +5924,7 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
 static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
 {
        block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
-                               (u64)-1);
+                               (u64)-1, NULL);
        WARN_ON(fs_info->trans_block_rsv.size > 0);
        WARN_ON(fs_info->trans_block_rsv.reserved > 0);
        WARN_ON(fs_info->chunk_block_rsv.size > 0);
@@ -5917,7 +5948,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
        WARN_ON_ONCE(!list_empty(&trans->new_bgs));
 
        block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
-                               trans->chunk_bytes_reserved);
+                               trans->chunk_bytes_reserved, NULL);
        trans->chunk_bytes_reserved = 0;
 }
 
@@ -6022,6 +6053,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
 {
        struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
        u64 reserve_size = 0;
+       u64 qgroup_rsv_size = 0;
        u64 csum_leaves;
        unsigned outstanding_extents;
 
@@ -6034,9 +6066,17 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
                                                 inode->csum_bytes);
        reserve_size += btrfs_calc_trans_metadata_size(fs_info,
                                                       csum_leaves);
+       /*
+        * For qgroup rsv, the calculation is very simple:
+        * account one nodesize for each outstanding extent
+        *
+        * This is overestimating in most cases.
+        */
+       qgroup_rsv_size = outstanding_extents * fs_info->nodesize;
 
        spin_lock(&block_rsv->lock);
        block_rsv->size = reserve_size;
+       block_rsv->qgroup_rsv_size = qgroup_rsv_size;
        spin_unlock(&block_rsv->lock);
 }
 
@@ -7229,7 +7269,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
        }
 out:
        if (pin)
-               add_pinned_bytes(fs_info, buf->len, btrfs_header_level(buf),
+               add_pinned_bytes(fs_info, buf->len, true,
                                 root->root_key.objectid);
 
        if (last_ref) {
@@ -7283,8 +7323,11 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
                                                 &old_ref_mod, &new_ref_mod);
        }
 
-       if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
-               add_pinned_bytes(fs_info, num_bytes, owner, root_objectid);
+       if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) {
+               bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+
+               add_pinned_bytes(fs_info, num_bytes, metadata, root_objectid);
+       }
 
        return ret;
 }
@@ -8414,7 +8457,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
                            struct btrfs_block_rsv *block_rsv, u32 blocksize)
 {
        block_rsv_add_bytes(block_rsv, blocksize, 0);
-       block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
+       block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
 }
 
 /*