btrfs: introduce delayed_refs_rsv
[sfrench/cifs-2.6.git] / fs / btrfs / extent-tree.c
index 32a68b6bbeea228f6e29ba3a8185440888ea9387..54dc55b55a4bf77dde99cefe9f99ba205217cfdc 100644 (file)
@@ -2462,6 +2462,7 @@ static void cleanup_ref_head_accounting(struct btrfs_trans_handle *trans,
        struct btrfs_fs_info *fs_info = trans->fs_info;
        struct btrfs_delayed_ref_root *delayed_refs =
                &trans->transaction->delayed_refs;
+       int nr_items = 1;       /* Dropping this ref head update. */
 
        if (head->total_ref_mod < 0) {
                struct btrfs_space_info *space_info;
@@ -2479,16 +2480,24 @@ static void cleanup_ref_head_accounting(struct btrfs_trans_handle *trans,
                                   -head->num_bytes,
                                   BTRFS_TOTAL_BYTES_PINNED_BATCH);
 
+               /*
+                * We had csum deletions accounted for in our delayed refs rsv,
+                * we need to drop the csum leaves for this update from our
+                * delayed_refs_rsv.
+                */
                if (head->is_data) {
                        spin_lock(&delayed_refs->lock);
                        delayed_refs->pending_csums -= head->num_bytes;
                        spin_unlock(&delayed_refs->lock);
+                       nr_items += btrfs_csum_bytes_to_leaves(fs_info,
+                               head->num_bytes);
                }
        }
 
        /* Also free its reserved qgroup space */
        btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root,
                                      head->qgroup_reserved);
+       btrfs_delayed_refs_rsv_release(fs_info, nr_items);
 }
 
 static int cleanup_ref_head(struct btrfs_trans_handle *trans,
@@ -3626,6 +3635,8 @@ again:
         */
        mutex_lock(&trans->transaction->cache_write_mutex);
        while (!list_empty(&dirty)) {
+               bool drop_reserve = true;
+
                cache = list_first_entry(&dirty,
                                         struct btrfs_block_group_cache,
                                         dirty_list);
@@ -3698,6 +3709,7 @@ again:
                                        list_add_tail(&cache->dirty_list,
                                                      &cur_trans->dirty_bgs);
                                        btrfs_get_block_group(cache);
+                                       drop_reserve = false;
                                }
                                spin_unlock(&cur_trans->dirty_bgs_lock);
                        } else if (ret) {
@@ -3708,6 +3720,8 @@ again:
                /* if its not on the io list, we need to put the block group */
                if (should_put)
                        btrfs_put_block_group(cache);
+               if (drop_reserve)
+                       btrfs_delayed_refs_rsv_release(fs_info, 1);
 
                if (ret)
                        break;
@@ -3856,6 +3870,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                /* if its not on the io list, we need to put the block group */
                if (should_put)
                        btrfs_put_block_group(cache);
+               btrfs_delayed_refs_rsv_release(fs_info, 1);
                spin_lock(&cur_trans->dirty_bgs_lock);
        }
        spin_unlock(&cur_trans->dirty_bgs_lock);
@@ -5389,6 +5404,90 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
        return 0;
 }
 
+/**
+ * btrfs_migrate_to_delayed_refs_rsv - transfer bytes to our delayed refs rsv.
+ * @fs_info - the fs info for our fs.
+ * @src - the source block rsv to transfer from.
+ * @num_bytes - the number of bytes to transfer.
+ *
+ * This transfers up to the num_bytes amount from the src rsv to the
+ * delayed_refs_rsv.  Any extra bytes are returned to the space info.
+ */
+void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
+                                      struct btrfs_block_rsv *src,
+                                      u64 num_bytes)
+{
+       struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
+       u64 to_free = 0;
+
+       spin_lock(&src->lock);
+       src->reserved -= num_bytes;
+       src->size -= num_bytes;
+       spin_unlock(&src->lock);
+
+       spin_lock(&delayed_refs_rsv->lock);
+       if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
+               u64 delta = delayed_refs_rsv->size -
+                       delayed_refs_rsv->reserved;
+               if (num_bytes > delta) {
+                       to_free = num_bytes - delta;
+                       num_bytes = delta;
+               }
+       } else {
+               to_free = num_bytes;
+               num_bytes = 0;
+       }
+
+       if (num_bytes)
+               delayed_refs_rsv->reserved += num_bytes;
+       if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
+               delayed_refs_rsv->full = 1;
+       spin_unlock(&delayed_refs_rsv->lock);
+
+       if (num_bytes)
+               trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
+                                             0, num_bytes, 1);
+       if (to_free)
+               space_info_add_old_bytes(fs_info, delayed_refs_rsv->space_info,
+                                        to_free);
+}
+
+/**
+ * btrfs_delayed_refs_rsv_refill - refill based on our delayed refs usage.
+ * @fs_info - the fs_info for our fs.
+ * @flush - control how we can flush for this reservation.
+ *
+ * This will refill the delayed block_rsv up to 1 items size worth of space and
+ * will return -ENOSPC if we can't make the reservation.
+ */
+int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
+                                 enum btrfs_reserve_flush_enum flush)
+{
+       struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
+       u64 limit = btrfs_calc_trans_metadata_size(fs_info, 1);
+       u64 num_bytes = 0;
+       int ret = -ENOSPC;
+
+       spin_lock(&block_rsv->lock);
+       if (block_rsv->reserved < block_rsv->size) {
+               num_bytes = block_rsv->size - block_rsv->reserved;
+               num_bytes = min(num_bytes, limit);
+       }
+       spin_unlock(&block_rsv->lock);
+
+       if (!num_bytes)
+               return 0;
+
+       ret = reserve_metadata_bytes(fs_info->extent_root, block_rsv,
+                                    num_bytes, flush);
+       if (ret)
+               return ret;
+       block_rsv_add_bytes(block_rsv, num_bytes, 0);
+       trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
+                                     0, num_bytes, 1);
+       return 0;
+}
+
 /*
  * This is for space we already have accounted in space_info->bytes_may_use, so
  * basically when we're returning space from block_rsv's.
@@ -5709,6 +5808,31 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
        return ret;
 }
 
+static u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
+                                    struct btrfs_block_rsv *block_rsv,
+                                    u64 num_bytes, u64 *qgroup_to_release)
+{
+       struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+       struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
+       struct btrfs_block_rsv *target = delayed_rsv;
+
+       if (target->full || target == block_rsv)
+               target = global_rsv;
+
+       if (block_rsv->space_info != target->space_info)
+               target = NULL;
+
+       return block_rsv_release_bytes(fs_info, block_rsv, target, num_bytes,
+                                      qgroup_to_release);
+}
+
+void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
+                            struct btrfs_block_rsv *block_rsv,
+                            u64 num_bytes)
+{
+       __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
+}
+
 /**
  * btrfs_inode_rsv_release - release any excessive reservation.
  * @inode - the inode we need to release from.
@@ -5723,7 +5847,6 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
 static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
 {
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
-       struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
        struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
        u64 released = 0;
        u64 qgroup_to_release = 0;
@@ -5733,8 +5856,8 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
         * are releasing 0 bytes, and then we'll just get the reservation over
         * the size free'd.
         */
-       released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0,
-                                          &qgroup_to_release);
+       released = __btrfs_block_rsv_release(fs_info, block_rsv, 0,
+                                            &qgroup_to_release);
        if (released > 0)
                trace_btrfs_space_reservation(fs_info, "delalloc",
                                              btrfs_ino(inode), released, 0);
@@ -5745,16 +5868,26 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
                                                   qgroup_to_release);
 }
 
-void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
-                            struct btrfs_block_rsv *block_rsv,
-                            u64 num_bytes)
+/**
+ * btrfs_delayed_refs_rsv_release - release a ref head's reservation.
+ * @fs_info - the fs_info for our fs.
+ * @nr - the number of items to drop.
+ *
+ * This drops the delayed ref head's count from the delayed refs rsv and frees
+ * any excess reservation we had.
+ */
+void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
 {
+       struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+       u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, nr);
+       u64 released = 0;
 
-       if (global_rsv == block_rsv ||
-           block_rsv->space_info != global_rsv->space_info)
-               global_rsv = NULL;
-       block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes, NULL);
+       released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv,
+                                          num_bytes, NULL);
+       if (released)
+               trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
+                                             0, released, 0);
 }
 
 static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
@@ -5819,9 +5952,10 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
        fs_info->trans_block_rsv.space_info = space_info;
        fs_info->empty_block_rsv.space_info = space_info;
        fs_info->delayed_block_rsv.space_info = space_info;
+       fs_info->delayed_refs_rsv.space_info = space_info;
 
-       fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
-       fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
+       fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv;
+       fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv;
        fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
        fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
        if (fs_info->quota_root)
@@ -5841,8 +5975,34 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
        WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
        WARN_ON(fs_info->delayed_block_rsv.size > 0);
        WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
+       WARN_ON(fs_info->delayed_refs_rsv.reserved > 0);
+       WARN_ON(fs_info->delayed_refs_rsv.size > 0);
 }
 
+/*
+ * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv
+ * @trans - the trans that may have generated delayed refs
+ *
+ * This is to be called anytime we may have adjusted trans->delayed_ref_updates,
+ * it'll calculate the additional size and add it to the delayed_refs_rsv.
+ */
+void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
+{
+       struct btrfs_fs_info *fs_info = trans->fs_info;
+       struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
+       u64 num_bytes;
+
+       if (!trans->delayed_ref_updates)
+               return;
+
+       num_bytes = btrfs_calc_trans_metadata_size(fs_info,
+                                                  trans->delayed_ref_updates);
+       spin_lock(&delayed_rsv->lock);
+       delayed_rsv->size += num_bytes;
+       delayed_rsv->full = 0;
+       spin_unlock(&delayed_rsv->lock);
+       trans->delayed_ref_updates = 0;
+}
 
 /*
  * To be called after all the new block groups attached to the transaction
@@ -6135,6 +6295,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
        u64 old_val;
        u64 byte_in_group;
        int factor;
+       int ret = 0;
 
        /* block accounting for super block */
        spin_lock(&info->delalloc_root_lock);
@@ -6148,8 +6309,10 @@ static int update_block_group(struct btrfs_trans_handle *trans,
 
        while (total) {
                cache = btrfs_lookup_block_group(info, bytenr);
-               if (!cache)
-                       return -ENOENT;
+               if (!cache) {
+                       ret = -ENOENT;
+                       break;
+               }
                factor = btrfs_bg_type_to_factor(cache->flags);
 
                /*
@@ -6208,6 +6371,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                        list_add_tail(&cache->dirty_list,
                                      &trans->transaction->dirty_bgs);
                        trans->transaction->num_dirty_bgs++;
+                       trans->delayed_ref_updates++;
                        btrfs_get_block_group(cache);
                }
                spin_unlock(&trans->transaction->dirty_bgs_lock);
@@ -6225,7 +6389,10 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                total -= num_bytes;
                bytenr += num_bytes;
        }
-       return 0;
+
+       /* Modified block groups are accounted for in the delayed_refs_rsv. */
+       btrfs_update_delayed_refs_rsv(trans);
+       return ret;
 }
 
 static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
@@ -8371,7 +8538,12 @@ again:
                goto again;
        }
 
-       if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
+       /*
+        * The global reserve still exists to save us from ourselves, so don't
+        * warn_on if we are short on our delayed refs reserve.
+        */
+       if (block_rsv->type != BTRFS_BLOCK_RSV_DELREFS &&
+           btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
                static DEFINE_RATELIMIT_STATE(_rs,
                                DEFAULT_RATELIMIT_INTERVAL * 10,
                                /*DEFAULT_RATELIMIT_BURST*/ 1);
@@ -10304,6 +10476,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
                add_block_group_free_space(trans, block_group);
                /* already aborted the transaction if it failed. */
 next:
+               btrfs_delayed_refs_rsv_release(fs_info, 1);
                list_del_init(&block_group->bg_list);
        }
        btrfs_trans_release_chunk_metadata(trans);
@@ -10381,6 +10554,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
        link_block_group(cache);
 
        list_add_tail(&cache->bg_list, &trans->new_bgs);
+       trans->delayed_ref_updates++;
+       btrfs_update_delayed_refs_rsv(trans);
 
        set_avail_alloc_bits(fs_info, type);
        return 0;
@@ -10418,6 +10593,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        int factor;
        struct btrfs_caching_control *caching_ctl = NULL;
        bool remove_em;
+       bool remove_rsv = false;
 
        block_group = btrfs_lookup_block_group(fs_info, group_start);
        BUG_ON(!block_group);
@@ -10482,6 +10658,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 
        if (!list_empty(&block_group->dirty_list)) {
                list_del_init(&block_group->dirty_list);
+               remove_rsv = true;
                btrfs_put_block_group(block_group);
        }
        spin_unlock(&trans->transaction->dirty_bgs_lock);
@@ -10691,6 +10868,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 
        ret = btrfs_del_item(trans, root, path);
 out:
+       if (remove_rsv)
+               btrfs_delayed_refs_rsv_release(fs_info, 1);
        btrfs_free_path(path);
        return ret;
 }