Merge tag 'pinctrl-v4.15-1' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw...
[sfrench/cifs-2.6.git] / fs / btrfs / extent-tree.c
index 0dcbbeacaadc7d91e7fd9dd3cc1dfb4617db82b0..673ac4e01dd07ddb788805a9fad631a2656b5d7f 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
 #include <linux/percpu_counter.h>
+#include <linux/lockdep.h>
 #include "hash.h"
 #include "tree-log.h"
 #include "disk-io.h"
@@ -912,7 +913,7 @@ search_again:
        head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
        if (head) {
                if (!mutex_trylock(&head->mutex)) {
-                       refcount_inc(&head->node.refs);
+                       refcount_inc(&head->refs);
                        spin_unlock(&delayed_refs->lock);
 
                        btrfs_release_path(path);
@@ -923,7 +924,7 @@ search_again:
                         */
                        mutex_lock(&head->mutex);
                        mutex_unlock(&head->mutex);
-                       btrfs_put_delayed_ref(&head->node);
+                       btrfs_put_delayed_ref_head(head);
                        goto search_again;
                }
                spin_lock(&head->lock);
@@ -932,7 +933,7 @@ search_again:
                else
                        BUG_ON(num_refs == 0);
 
-               num_refs += head->node.ref_mod;
+               num_refs += head->ref_mod;
                spin_unlock(&head->lock);
                mutex_unlock(&head->mutex);
        }
@@ -2337,7 +2338,7 @@ static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
 
 static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
                                 struct btrfs_fs_info *fs_info,
-                                struct btrfs_delayed_ref_node *node,
+                                struct btrfs_delayed_ref_head *head,
                                 struct btrfs_delayed_extent_op *extent_op)
 {
        struct btrfs_key key;
@@ -2359,14 +2360,14 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
-       key.objectid = node->bytenr;
+       key.objectid = head->bytenr;
 
        if (metadata) {
                key.type = BTRFS_METADATA_ITEM_KEY;
                key.offset = extent_op->level;
        } else {
                key.type = BTRFS_EXTENT_ITEM_KEY;
-               key.offset = node->num_bytes;
+               key.offset = head->num_bytes;
        }
 
 again:
@@ -2383,17 +2384,17 @@ again:
                                path->slots[0]--;
                                btrfs_item_key_to_cpu(path->nodes[0], &key,
                                                      path->slots[0]);
-                               if (key.objectid == node->bytenr &&
+                               if (key.objectid == head->bytenr &&
                                    key.type == BTRFS_EXTENT_ITEM_KEY &&
-                                   key.offset == node->num_bytes)
+                                   key.offset == head->num_bytes)
                                        ret = 0;
                        }
                        if (ret > 0) {
                                btrfs_release_path(path);
                                metadata = 0;
 
-                               key.objectid = node->bytenr;
-                               key.offset = node->num_bytes;
+                               key.objectid = head->bytenr;
+                               key.offset = head->num_bytes;
                                key.type = BTRFS_EXTENT_ITEM_KEY;
                                goto again;
                        }
@@ -2500,44 +2501,6 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
                return 0;
        }
 
-       if (btrfs_delayed_ref_is_head(node)) {
-               struct btrfs_delayed_ref_head *head;
-               /*
-                * we've hit the end of the chain and we were supposed
-                * to insert this extent into the tree.  But, it got
-                * deleted before we ever needed to insert it, so all
-                * we have to do is clean up the accounting
-                */
-               BUG_ON(extent_op);
-               head = btrfs_delayed_node_to_head(node);
-               trace_run_delayed_ref_head(fs_info, node, head, node->action);
-
-               if (head->total_ref_mod < 0) {
-                       struct btrfs_block_group_cache *cache;
-
-                       cache = btrfs_lookup_block_group(fs_info, node->bytenr);
-                       ASSERT(cache);
-                       percpu_counter_add(&cache->space_info->total_bytes_pinned,
-                                          -node->num_bytes);
-                       btrfs_put_block_group(cache);
-               }
-
-               if (insert_reserved) {
-                       btrfs_pin_extent(fs_info, node->bytenr,
-                                        node->num_bytes, 1);
-                       if (head->is_data) {
-                               ret = btrfs_del_csums(trans, fs_info,
-                                                     node->bytenr,
-                                                     node->num_bytes);
-                       }
-               }
-
-               /* Also free its reserved qgroup space */
-               btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root,
-                                             head->qgroup_reserved);
-               return ret;
-       }
-
        if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
            node->type == BTRFS_SHARED_BLOCK_REF_KEY)
                ret = run_delayed_tree_ref(trans, fs_info, node, extent_op,
@@ -2556,7 +2519,7 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head)
 {
        struct btrfs_delayed_ref_node *ref;
 
-       if (list_empty(&head->ref_list))
+       if (RB_EMPTY_ROOT(&head->ref_tree))
                return NULL;
 
        /*
@@ -2569,8 +2532,8 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head)
                return list_first_entry(&head->ref_add_list,
                                struct btrfs_delayed_ref_node, add_list);
 
-       ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
-                              list);
+       ref = rb_entry(rb_first(&head->ref_tree),
+                      struct btrfs_delayed_ref_node, ref_node);
        ASSERT(list_empty(&ref->add_list));
        return ref;
 }
@@ -2600,7 +2563,7 @@ static int cleanup_extent_op(struct btrfs_trans_handle *trans,
                return 0;
        }
        spin_unlock(&head->lock);
-       ret = run_delayed_extent_op(trans, fs_info, &head->node, extent_op);
+       ret = run_delayed_extent_op(trans, fs_info, head, extent_op);
        btrfs_free_delayed_extent_op(extent_op);
        return ret ? ret : 1;
 }
@@ -2630,15 +2593,50 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
        spin_unlock(&head->lock);
        spin_lock(&delayed_refs->lock);
        spin_lock(&head->lock);
-       if (!list_empty(&head->ref_list) || head->extent_op) {
+       if (!RB_EMPTY_ROOT(&head->ref_tree) || head->extent_op) {
                spin_unlock(&head->lock);
                spin_unlock(&delayed_refs->lock);
                return 1;
        }
-       head->node.in_tree = 0;
        delayed_refs->num_heads--;
        rb_erase(&head->href_node, &delayed_refs->href_root);
+       RB_CLEAR_NODE(&head->href_node);
        spin_unlock(&delayed_refs->lock);
+       spin_unlock(&head->lock);
+       atomic_dec(&delayed_refs->num_entries);
+
+       trace_run_delayed_ref_head(fs_info, head, 0);
+
+       if (head->total_ref_mod < 0) {
+               struct btrfs_block_group_cache *cache;
+
+               cache = btrfs_lookup_block_group(fs_info, head->bytenr);
+               ASSERT(cache);
+               percpu_counter_add(&cache->space_info->total_bytes_pinned,
+                                  -head->num_bytes);
+               btrfs_put_block_group(cache);
+
+               if (head->is_data) {
+                       spin_lock(&delayed_refs->lock);
+                       delayed_refs->pending_csums -= head->num_bytes;
+                       spin_unlock(&delayed_refs->lock);
+               }
+       }
+
+       if (head->must_insert_reserved) {
+               btrfs_pin_extent(fs_info, head->bytenr,
+                                head->num_bytes, 1);
+               if (head->is_data) {
+                       ret = btrfs_del_csums(trans, fs_info, head->bytenr,
+                                             head->num_bytes);
+               }
+       }
+
+       /* Also free its reserved qgroup space */
+       btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root,
+                                     head->qgroup_reserved);
+       btrfs_delayed_ref_unlock(head);
+       btrfs_put_delayed_ref_head(head);
        return 0;
 }
 
@@ -2722,6 +2720,10 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                        continue;
                }
 
+               /*
+                * We're done processing refs in this ref_head, clean everything
+                * up and move on to the next ref_head.
+                */
                if (!ref) {
                        ret = cleanup_ref_head(trans, fs_info, locked_ref);
                        if (ret > 0 ) {
@@ -2731,34 +2733,31 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                        } else if (ret) {
                                return ret;
                        }
+                       locked_ref = NULL;
+                       count++;
+                       continue;
+               }
 
-                       /*
-                        * All delayed refs have been processed, Go ahead and
-                        * send the head node to run_one_delayed_ref, so that
-                        * any accounting fixes can happen
-                        */
-                       ref = &locked_ref->node;
-               } else {
-                       actual_count++;
-                       ref->in_tree = 0;
-                       list_del(&ref->list);
-                       if (!list_empty(&ref->add_list))
-                               list_del(&ref->add_list);
-                       /*
-                        * when we play the delayed ref, also correct the
-                        * ref_mod on head
-                        */
-                       switch (ref->action) {
-                       case BTRFS_ADD_DELAYED_REF:
-                       case BTRFS_ADD_DELAYED_EXTENT:
-                               locked_ref->node.ref_mod -= ref->ref_mod;
-                               break;
-                       case BTRFS_DROP_DELAYED_REF:
-                               locked_ref->node.ref_mod += ref->ref_mod;
-                               break;
-                       default:
-                               WARN_ON(1);
-                       }
+               actual_count++;
+               ref->in_tree = 0;
+               rb_erase(&ref->ref_node, &locked_ref->ref_tree);
+               RB_CLEAR_NODE(&ref->ref_node);
+               if (!list_empty(&ref->add_list))
+                       list_del(&ref->add_list);
+               /*
+                * When we play the delayed ref, also correct the ref_mod on
+                * head
+                */
+               switch (ref->action) {
+               case BTRFS_ADD_DELAYED_REF:
+               case BTRFS_ADD_DELAYED_EXTENT:
+                       locked_ref->ref_mod -= ref->ref_mod;
+                       break;
+               case BTRFS_DROP_DELAYED_REF:
+                       locked_ref->ref_mod += ref->ref_mod;
+                       break;
+               default:
+                       WARN_ON(1);
                }
                atomic_dec(&delayed_refs->num_entries);
 
@@ -2785,22 +2784,6 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                        return ret;
                }
 
-               /*
-                * If this node is a head, that means all the refs in this head
-                * have been dealt with, and we will pick the next head to deal
-                * with, so we must unlock the head and drop it from the cluster
-                * list before we release it.
-                */
-               if (btrfs_delayed_ref_is_head(ref)) {
-                       if (locked_ref->is_data &&
-                           locked_ref->total_ref_mod < 0) {
-                               spin_lock(&delayed_refs->lock);
-                               delayed_refs->pending_csums -= ref->num_bytes;
-                               spin_unlock(&delayed_refs->lock);
-                       }
-                       btrfs_delayed_ref_unlock(locked_ref);
-                       locked_ref = NULL;
-               }
                btrfs_put_delayed_ref(ref);
                count++;
                cond_resched();
@@ -3104,33 +3087,16 @@ again:
                        spin_unlock(&delayed_refs->lock);
                        goto out;
                }
+               head = rb_entry(node, struct btrfs_delayed_ref_head,
+                               href_node);
+               refcount_inc(&head->refs);
+               spin_unlock(&delayed_refs->lock);
 
-               while (node) {
-                       head = rb_entry(node, struct btrfs_delayed_ref_head,
-                                       href_node);
-                       if (btrfs_delayed_ref_is_head(&head->node)) {
-                               struct btrfs_delayed_ref_node *ref;
-
-                               ref = &head->node;
-                               refcount_inc(&ref->refs);
-
-                               spin_unlock(&delayed_refs->lock);
-                               /*
-                                * Mutex was contended, block until it's
-                                * released and try again
-                                */
-                               mutex_lock(&head->mutex);
-                               mutex_unlock(&head->mutex);
+               /* Mutex was contended, block until it's released and retry. */
+               mutex_lock(&head->mutex);
+               mutex_unlock(&head->mutex);
 
-                               btrfs_put_delayed_ref(ref);
-                               cond_resched();
-                               goto again;
-                       } else {
-                               WARN_ON(1);
-                       }
-                       node = rb_next(node);
-               }
-               spin_unlock(&delayed_refs->lock);
+               btrfs_put_delayed_ref_head(head);
                cond_resched();
                goto again;
        }
@@ -3173,6 +3139,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
        struct btrfs_delayed_data_ref *data_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
        struct btrfs_transaction *cur_trans;
+       struct rb_node *node;
        int ret = 0;
 
        cur_trans = root->fs_info->running_transaction;
@@ -3188,7 +3155,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
        }
 
        if (!mutex_trylock(&head->mutex)) {
-               refcount_inc(&head->node.refs);
+               refcount_inc(&head->refs);
                spin_unlock(&delayed_refs->lock);
 
                btrfs_release_path(path);
@@ -3199,13 +3166,18 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
                 */
                mutex_lock(&head->mutex);
                mutex_unlock(&head->mutex);
-               btrfs_put_delayed_ref(&head->node);
+               btrfs_put_delayed_ref_head(head);
                return -EAGAIN;
        }
        spin_unlock(&delayed_refs->lock);
 
        spin_lock(&head->lock);
-       list_for_each_entry(ref, &head->ref_list, list) {
+       /*
+        * XXX: We should replace this with a proper search function in the
+        * future.
+        */
+       for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
+               ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
                /* If it's a shared ref we know a cross reference exists */
                if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
                        ret = 1;
@@ -4847,7 +4819,6 @@ static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
 static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
                            u64 orig, bool wait_ordered)
 {
-       struct btrfs_block_rsv *block_rsv;
        struct btrfs_space_info *space_info;
        struct btrfs_trans_handle *trans;
        u64 delalloc_bytes;
@@ -4863,8 +4834,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
        to_reclaim = items * EXTENT_SIZE_PER_ITEM;
 
        trans = (struct btrfs_trans_handle *)current->journal_info;
-       block_rsv = &fs_info->delalloc_block_rsv;
-       space_info = block_rsv->space_info;
+       space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
 
        delalloc_bytes = percpu_counter_sum_positive(
                                                &fs_info->delalloc_bytes);
@@ -5600,11 +5570,12 @@ again:
        }
 }
 
-static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
+static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
                                    struct btrfs_block_rsv *block_rsv,
                                    struct btrfs_block_rsv *dest, u64 num_bytes)
 {
        struct btrfs_space_info *space_info = block_rsv->space_info;
+       u64 ret;
 
        spin_lock(&block_rsv->lock);
        if (num_bytes == (u64)-1)
@@ -5619,6 +5590,7 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
        }
        spin_unlock(&block_rsv->lock);
 
+       ret = num_bytes;
        if (num_bytes > 0) {
                if (dest) {
                        spin_lock(&dest->lock);
@@ -5638,6 +5610,7 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
                        space_info_add_old_bytes(fs_info, space_info,
                                                 num_bytes);
        }
+       return ret;
 }
 
 int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
@@ -5661,6 +5634,15 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
        rsv->type = type;
 }
 
+void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
+                                  struct btrfs_block_rsv *rsv,
+                                  unsigned short type)
+{
+       btrfs_init_block_rsv(rsv, type);
+       rsv->space_info = __find_space_info(fs_info,
+                                           BTRFS_BLOCK_GROUP_METADATA);
+}
+
 struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
                                              unsigned short type)
 {
@@ -5670,9 +5652,7 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
        if (!block_rsv)
                return NULL;
 
-       btrfs_init_block_rsv(block_rsv, type);
-       block_rsv->space_info = __find_space_info(fs_info,
-                                                 BTRFS_BLOCK_GROUP_METADATA);
+       btrfs_init_metadata_block_rsv(fs_info, block_rsv, type);
        return block_rsv;
 }
 
@@ -5755,6 +5735,66 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
        return ret;
 }
 
+/**
+ * btrfs_inode_rsv_refill - refill the inode block rsv.
+ * @inode - the inode we are refilling.
+ * @flush - the flusing restriction.
+ *
+ * Essentially the same as btrfs_block_rsv_refill, except it uses the
+ * block_rsv->size as the minimum size.  We'll either refill the missing amount
+ * or return if we already have enough space.  This will also handle the resreve
+ * tracepoint for the reserved amount.
+ */
+int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
+                          enum btrfs_reserve_flush_enum flush)
+{
+       struct btrfs_root *root = inode->root;
+       struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
+       u64 num_bytes = 0;
+       int ret = -ENOSPC;
+
+       spin_lock(&block_rsv->lock);
+       if (block_rsv->reserved < block_rsv->size)
+               num_bytes = block_rsv->size - block_rsv->reserved;
+       spin_unlock(&block_rsv->lock);
+
+       if (num_bytes == 0)
+               return 0;
+
+       ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
+       if (!ret) {
+               block_rsv_add_bytes(block_rsv, num_bytes, 0);
+               trace_btrfs_space_reservation(root->fs_info, "delalloc",
+                                             btrfs_ino(inode), num_bytes, 1);
+       }
+       return ret;
+}
+
+/**
+ * btrfs_inode_rsv_release - release any excessive reservation.
+ * @inode - the inode we need to release from.
+ *
+ * This is the same as btrfs_block_rsv_release, except that it handles the
+ * tracepoint for the reservation.
+ */
+void btrfs_inode_rsv_release(struct btrfs_inode *inode)
+{
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
+       struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+       struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
+       u64 released = 0;
+
+       /*
+        * Since we statically set the block_rsv->size we just want to say we
+        * are releasing 0 bytes, and then we'll just get the reservation over
+        * the size free'd.
+        */
+       released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0);
+       if (released > 0)
+               trace_btrfs_space_reservation(fs_info, "delalloc",
+                                             btrfs_ino(inode), released, 0);
+}
+
 void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
                             struct btrfs_block_rsv *block_rsv,
                             u64 num_bytes)
@@ -5826,7 +5866,6 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
 
        space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
        fs_info->global_block_rsv.space_info = space_info;
-       fs_info->delalloc_block_rsv.space_info = space_info;
        fs_info->trans_block_rsv.space_info = space_info;
        fs_info->empty_block_rsv.space_info = space_info;
        fs_info->delayed_block_rsv.space_info = space_info;
@@ -5846,8 +5885,6 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
 {
        block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
                                (u64)-1);
-       WARN_ON(fs_info->delalloc_block_rsv.size > 0);
-       WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
        WARN_ON(fs_info->trans_block_rsv.size > 0);
        WARN_ON(fs_info->trans_block_rsv.reserved > 0);
        WARN_ON(fs_info->chunk_block_rsv.size > 0);
@@ -5859,12 +5896,15 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
                                  struct btrfs_fs_info *fs_info)
 {
-       if (!trans->block_rsv)
+       if (!trans->block_rsv) {
+               ASSERT(!trans->bytes_reserved);
                return;
+       }
 
        if (!trans->bytes_reserved)
                return;
 
+       ASSERT(trans->block_rsv == &fs_info->trans_block_rsv);
        trace_btrfs_space_reservation(fs_info, "transaction",
                                      trans->transid, trans->bytes_reserved, 0);
        btrfs_block_rsv_release(fs_info, trans->block_rsv,
@@ -5986,104 +6026,37 @@ void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
        btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
 }
 
-/**
- * drop_outstanding_extent - drop an outstanding extent
- * @inode: the inode we're dropping the extent for
- * @num_bytes: the number of bytes we're releasing.
- *
- * This is called when we are freeing up an outstanding extent, either called
- * after an error or after an extent is written.  This will return the number of
- * reserved extents that need to be freed.  This must be called with
- * BTRFS_I(inode)->lock held.
- */
-static unsigned drop_outstanding_extent(struct btrfs_inode *inode,
-               u64 num_bytes)
-{
-       unsigned drop_inode_space = 0;
-       unsigned dropped_extents = 0;
-       unsigned num_extents;
-
-       num_extents = count_max_extents(num_bytes);
-       ASSERT(num_extents);
-       ASSERT(inode->outstanding_extents >= num_extents);
-       inode->outstanding_extents -= num_extents;
-
-       if (inode->outstanding_extents == 0 &&
-           test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
-                              &inode->runtime_flags))
-               drop_inode_space = 1;
-
-       /*
-        * If we have more or the same amount of outstanding extents than we have
-        * reserved then we need to leave the reserved extents count alone.
-        */
-       if (inode->outstanding_extents >= inode->reserved_extents)
-               return drop_inode_space;
-
-       dropped_extents = inode->reserved_extents - inode->outstanding_extents;
-       inode->reserved_extents -= dropped_extents;
-       return dropped_extents + drop_inode_space;
-}
-
-/**
- * calc_csum_metadata_size - return the amount of metadata space that must be
- *     reserved/freed for the given bytes.
- * @inode: the inode we're manipulating
- * @num_bytes: the number of bytes in question
- * @reserve: 1 if we are reserving space, 0 if we are freeing space
- *
- * This adjusts the number of csum_bytes in the inode and then returns the
- * correct amount of metadata that must either be reserved or freed.  We
- * calculate how many checksums we can fit into one leaf and then divide the
- * number of bytes that will need to be checksumed by this value to figure out
- * how many checksums will be required.  If we are adding bytes then the number
- * may go up and we will return the number of additional bytes that must be
- * reserved.  If it is going down we will return the number of bytes that must
- * be freed.
- *
- * This must be called with BTRFS_I(inode)->lock held.
- */
-static u64 calc_csum_metadata_size(struct btrfs_inode *inode, u64 num_bytes,
-                                  int reserve)
+static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
+                                                struct btrfs_inode *inode)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
-       u64 old_csums, num_csums;
-
-       if (inode->flags & BTRFS_INODE_NODATASUM && inode->csum_bytes == 0)
-               return 0;
-
-       old_csums = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes);
-       if (reserve)
-               inode->csum_bytes += num_bytes;
-       else
-               inode->csum_bytes -= num_bytes;
-       num_csums = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes);
+       struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
+       u64 reserve_size = 0;
+       u64 csum_leaves;
+       unsigned outstanding_extents;
 
-       /* No change, no need to reserve more */
-       if (old_csums == num_csums)
-               return 0;
-
-       if (reserve)
-               return btrfs_calc_trans_metadata_size(fs_info,
-                                                     num_csums - old_csums);
+       lockdep_assert_held(&inode->lock);
+       outstanding_extents = inode->outstanding_extents;
+       if (outstanding_extents)
+               reserve_size = btrfs_calc_trans_metadata_size(fs_info,
+                                               outstanding_extents + 1);
+       csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
+                                                inode->csum_bytes);
+       reserve_size += btrfs_calc_trans_metadata_size(fs_info,
+                                                      csum_leaves);
 
-       return btrfs_calc_trans_metadata_size(fs_info, old_csums - num_csums);
+       spin_lock(&block_rsv->lock);
+       block_rsv->size = reserve_size;
+       spin_unlock(&block_rsv->lock);
 }
 
 int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
        struct btrfs_root *root = inode->root;
-       struct btrfs_block_rsv *block_rsv = &fs_info->delalloc_block_rsv;
-       u64 to_reserve = 0;
-       u64 csum_bytes;
        unsigned nr_extents;
        enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
        int ret = 0;
        bool delalloc_lock = true;
-       u64 to_free = 0;
-       unsigned dropped;
-       bool release_extra = false;
 
        /* If we are a free space inode we need to not flush since we will be in
         * the middle of a transaction commit.  We also don't need the delalloc
@@ -6109,19 +6082,12 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 
        num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
 
+       /* Add our new extents and calculate the new rsv size. */
        spin_lock(&inode->lock);
        nr_extents = count_max_extents(num_bytes);
-       inode->outstanding_extents += nr_extents;
-
-       nr_extents = 0;
-       if (inode->outstanding_extents > inode->reserved_extents)
-               nr_extents += inode->outstanding_extents -
-                       inode->reserved_extents;
-
-       /* We always want to reserve a slot for updating the inode. */
-       to_reserve = btrfs_calc_trans_metadata_size(fs_info, nr_extents + 1);
-       to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
-       csum_bytes = inode->csum_bytes;
+       btrfs_mod_outstanding_extents(inode, nr_extents);
+       inode->csum_bytes += num_bytes;
+       btrfs_calculate_inode_block_rsv_size(fs_info, inode);
        spin_unlock(&inode->lock);
 
        if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
@@ -6131,92 +6097,26 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
                        goto out_fail;
        }
 
-       ret = btrfs_block_rsv_add(root, block_rsv, to_reserve, flush);
+       ret = btrfs_inode_rsv_refill(inode, flush);
        if (unlikely(ret)) {
                btrfs_qgroup_free_meta(root,
                                       nr_extents * fs_info->nodesize);
                goto out_fail;
        }
 
-       spin_lock(&inode->lock);
-       if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
-                            &inode->runtime_flags)) {
-               to_reserve -= btrfs_calc_trans_metadata_size(fs_info, 1);
-               release_extra = true;
-       }
-       inode->reserved_extents += nr_extents;
-       spin_unlock(&inode->lock);
-
        if (delalloc_lock)
                mutex_unlock(&inode->delalloc_mutex);
-
-       if (to_reserve)
-               trace_btrfs_space_reservation(fs_info, "delalloc",
-                                             btrfs_ino(inode), to_reserve, 1);
-       if (release_extra)
-               btrfs_block_rsv_release(fs_info, block_rsv,
-                               btrfs_calc_trans_metadata_size(fs_info, 1));
        return 0;
 
 out_fail:
        spin_lock(&inode->lock);
-       dropped = drop_outstanding_extent(inode, num_bytes);
-       /*
-        * If the inodes csum_bytes is the same as the original
-        * csum_bytes then we know we haven't raced with any free()ers
-        * so we can just reduce our inodes csum bytes and carry on.
-        */
-       if (inode->csum_bytes == csum_bytes) {
-               calc_csum_metadata_size(inode, num_bytes, 0);
-       } else {
-               u64 orig_csum_bytes = inode->csum_bytes;
-               u64 bytes;
-
-               /*
-                * This is tricky, but first we need to figure out how much we
-                * freed from any free-ers that occurred during this
-                * reservation, so we reset ->csum_bytes to the csum_bytes
-                * before we dropped our lock, and then call the free for the
-                * number of bytes that were freed while we were trying our
-                * reservation.
-                */
-               bytes = csum_bytes - inode->csum_bytes;
-               inode->csum_bytes = csum_bytes;
-               to_free = calc_csum_metadata_size(inode, bytes, 0);
-
-
-               /*
-                * Now we need to see how much we would have freed had we not
-                * been making this reservation and our ->csum_bytes were not
-                * artificially inflated.
-                */
-               inode->csum_bytes = csum_bytes - num_bytes;
-               bytes = csum_bytes - orig_csum_bytes;
-               bytes = calc_csum_metadata_size(inode, bytes, 0);
-
-               /*
-                * Now reset ->csum_bytes to what it should be.  If bytes is
-                * more than to_free then we would have freed more space had we
-                * not had an artificially high ->csum_bytes, so we need to free
-                * the remainder.  If bytes is the same or less then we don't
-                * need to do anything, the other free-ers did the correct
-                * thing.
-                */
-               inode->csum_bytes = orig_csum_bytes - num_bytes;
-               if (bytes > to_free)
-                       to_free = bytes - to_free;
-               else
-                       to_free = 0;
-       }
+       nr_extents = count_max_extents(num_bytes);
+       btrfs_mod_outstanding_extents(inode, -nr_extents);
+       inode->csum_bytes -= num_bytes;
+       btrfs_calculate_inode_block_rsv_size(fs_info, inode);
        spin_unlock(&inode->lock);
-       if (dropped)
-               to_free += btrfs_calc_trans_metadata_size(fs_info, dropped);
 
-       if (to_free) {
-               btrfs_block_rsv_release(fs_info, block_rsv, to_free);
-               trace_btrfs_space_reservation(fs_info, "delalloc",
-                                             btrfs_ino(inode), to_free, 0);
-       }
+       btrfs_inode_rsv_release(inode);
        if (delalloc_lock)
                mutex_unlock(&inode->delalloc_mutex);
        return ret;
@@ -6224,36 +6124,55 @@ out_fail:
 
 /**
  * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
- * @inode: the inode to release the reservation for
- * @num_bytes: the number of bytes we're releasing
+ * @inode: the inode to release the reservation for.
+ * @num_bytes: the number of bytes we are releasing.
  *
  * This will release the metadata reservation for an inode.  This can be called
  * once we complete IO for a given set of bytes to release their metadata
- * reservations.
+ * reservations, or on error for the same reason.
  */
 void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
-       u64 to_free = 0;
-       unsigned dropped;
 
        num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
        spin_lock(&inode->lock);
-       dropped = drop_outstanding_extent(inode, num_bytes);
-
-       if (num_bytes)
-               to_free = calc_csum_metadata_size(inode, num_bytes, 0);
+       inode->csum_bytes -= num_bytes;
+       btrfs_calculate_inode_block_rsv_size(fs_info, inode);
        spin_unlock(&inode->lock);
-       if (dropped > 0)
-               to_free += btrfs_calc_trans_metadata_size(fs_info, dropped);
 
        if (btrfs_is_testing(fs_info))
                return;
 
-       trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode),
-                                     to_free, 0);
+       btrfs_inode_rsv_release(inode);
+}
 
-       btrfs_block_rsv_release(fs_info, &fs_info->delalloc_block_rsv, to_free);
+/**
+ * btrfs_delalloc_release_extents - release our outstanding_extents
+ * @inode: the inode to balance the reservation for.
+ * @num_bytes: the number of bytes we originally reserved with
+ *
+ * When we reserve space we increase outstanding_extents for the extents we may
+ * add.  Once we've set the range as delalloc or created our ordered extents we
+ * have outstanding_extents to track the real usage, so we use this to free our
+ * temporarily tracked outstanding_extents.  This _must_ be used in conjunction
+ * with btrfs_delalloc_reserve_metadata.
+ */
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
+{
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+       unsigned num_extents;
+
+       spin_lock(&inode->lock);
+       num_extents = count_max_extents(num_bytes);
+       btrfs_mod_outstanding_extents(inode, -num_extents);
+       btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+       spin_unlock(&inode->lock);
+
+       if (btrfs_is_testing(fs_info))
+               return;
+
+       btrfs_inode_rsv_release(inode);
 }
 
 /**
@@ -6300,10 +6219,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
  * @inode: inode we're releasing space for
  * @start: start position of the space already reserved
  * @len: the len of the space already reserved
- *
- * This must be matched with a call to btrfs_delalloc_reserve_space.  This is
- * called in the case that we don't need the metadata AND data reservations
- * anymore.  So if there is an error or we insert an inline extent.
+ * @release_bytes: the len of the space we consumed or didn't use
  *
  * This function will release the metadata space that was not used and will
  * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
@@ -6311,7 +6227,8 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
  * Also it will handle the qgroup reserved space.
  */
 void btrfs_delalloc_release_space(struct inode *inode,
-                       struct extent_changeset *reserved, u64 start, u64 len)
+                                 struct extent_changeset *reserved,
+                                 u64 start, u64 len)
 {
        btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
        btrfs_free_reserved_data_space(inode, reserved, start, len);
@@ -7231,7 +7148,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
                goto out_delayed_unlock;
 
        spin_lock(&head->lock);
-       if (!list_empty(&head->ref_list))
+       if (!RB_EMPTY_ROOT(&head->ref_tree))
                goto out;
 
        if (head->extent_op) {
@@ -7252,9 +7169,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
         * at this point we have a head with no other entries.  Go
         * ahead and process it.
         */
-       head->node.in_tree = 0;
        rb_erase(&head->href_node, &delayed_refs->href_root);
-
+       RB_CLEAR_NODE(&head->href_node);
        atomic_dec(&delayed_refs->num_entries);
 
        /*
@@ -7273,7 +7189,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
                ret = 1;
 
        mutex_unlock(&head->mutex);
-       btrfs_put_delayed_ref(&head->node);
+       btrfs_put_delayed_ref_head(head);
        return ret;
 out:
        spin_unlock(&head->lock);