Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
[sfrench/cifs-2.6.git] / fs / btrfs / extent-tree.c
index b232150b5b6b7500b337337ad07bd03e66f3e407..2ad813674d77e3cb119fa5049c6e5b7407832645 100644 (file)
@@ -467,13 +467,59 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
                             struct btrfs_root *root,
                             int load_cache_only)
 {
+       DEFINE_WAIT(wait);
        struct btrfs_fs_info *fs_info = cache->fs_info;
        struct btrfs_caching_control *caching_ctl;
        int ret = 0;
 
-       smp_mb();
-       if (cache->cached != BTRFS_CACHE_NO)
+       caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
+       BUG_ON(!caching_ctl);
+
+       INIT_LIST_HEAD(&caching_ctl->list);
+       mutex_init(&caching_ctl->mutex);
+       init_waitqueue_head(&caching_ctl->wait);
+       caching_ctl->block_group = cache;
+       caching_ctl->progress = cache->key.objectid;
+       atomic_set(&caching_ctl->count, 1);
+       caching_ctl->work.func = caching_thread;
+
+       spin_lock(&cache->lock);
+       /*
+        * This should be a rare occasion, but this could happen I think in the
+        * case where one thread starts to load the space cache info, and then
+        * some other thread starts a transaction commit which tries to do an
+        * allocation while the other thread is still loading the space cache
+        * info.  The previous loop should have kept us from choosing this block
+        * group, but if we've moved to the state where we will wait on caching
+        * block groups we need to first check if we're doing a fast load here,
+        * so we can wait for it to finish, otherwise we could end up allocating
+        * from a block group who's cache gets evicted for one reason or
+        * another.
+        */
+       while (cache->cached == BTRFS_CACHE_FAST) {
+               struct btrfs_caching_control *ctl;
+
+               ctl = cache->caching_ctl;
+               atomic_inc(&ctl->count);
+               prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
+               spin_unlock(&cache->lock);
+
+               schedule();
+
+               finish_wait(&ctl->wait, &wait);
+               put_caching_control(ctl);
+               spin_lock(&cache->lock);
+       }
+
+       if (cache->cached != BTRFS_CACHE_NO) {
+               spin_unlock(&cache->lock);
+               kfree(caching_ctl);
                return 0;
+       }
+       WARN_ON(cache->caching_ctl);
+       cache->caching_ctl = caching_ctl;
+       cache->cached = BTRFS_CACHE_FAST;
+       spin_unlock(&cache->lock);
 
        /*
         * We can't do the read from on-disk cache during a commit since we need
@@ -484,56 +530,51 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
        if (trans && (!trans->transaction->in_commit) &&
            (root && root != root->fs_info->tree_root) &&
            btrfs_test_opt(root, SPACE_CACHE)) {
-               spin_lock(&cache->lock);
-               if (cache->cached != BTRFS_CACHE_NO) {
-                       spin_unlock(&cache->lock);
-                       return 0;
-               }
-               cache->cached = BTRFS_CACHE_STARTED;
-               spin_unlock(&cache->lock);
-
                ret = load_free_space_cache(fs_info, cache);
 
                spin_lock(&cache->lock);
                if (ret == 1) {
+                       cache->caching_ctl = NULL;
                        cache->cached = BTRFS_CACHE_FINISHED;
                        cache->last_byte_to_unpin = (u64)-1;
                } else {
-                       cache->cached = BTRFS_CACHE_NO;
+                       if (load_cache_only) {
+                               cache->caching_ctl = NULL;
+                               cache->cached = BTRFS_CACHE_NO;
+                       } else {
+                               cache->cached = BTRFS_CACHE_STARTED;
+                       }
                }
                spin_unlock(&cache->lock);
+               wake_up(&caching_ctl->wait);
                if (ret == 1) {
+                       put_caching_control(caching_ctl);
                        free_excluded_extents(fs_info->extent_root, cache);
                        return 0;
                }
+       } else {
+               /*
+                * We are not going to do the fast caching, set cached to the
+                * appropriate value and wakeup any waiters.
+                */
+               spin_lock(&cache->lock);
+               if (load_cache_only) {
+                       cache->caching_ctl = NULL;
+                       cache->cached = BTRFS_CACHE_NO;
+               } else {
+                       cache->cached = BTRFS_CACHE_STARTED;
+               }
+               spin_unlock(&cache->lock);
+               wake_up(&caching_ctl->wait);
        }
 
-       if (load_cache_only)
-               return 0;
-
-       caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
-       BUG_ON(!caching_ctl);
-
-       INIT_LIST_HEAD(&caching_ctl->list);
-       mutex_init(&caching_ctl->mutex);
-       init_waitqueue_head(&caching_ctl->wait);
-       caching_ctl->block_group = cache;
-       caching_ctl->progress = cache->key.objectid;
-       /* one for caching kthread, one for caching block group list */
-       atomic_set(&caching_ctl->count, 2);
-       caching_ctl->work.func = caching_thread;
-
-       spin_lock(&cache->lock);
-       if (cache->cached != BTRFS_CACHE_NO) {
-               spin_unlock(&cache->lock);
-               kfree(caching_ctl);
+       if (load_cache_only) {
+               put_caching_control(caching_ctl);
                return 0;
        }
-       cache->caching_ctl = caching_ctl;
-       cache->cached = BTRFS_CACHE_STARTED;
-       spin_unlock(&cache->lock);
 
        down_write(&fs_info->extent_commit_sem);
+       atomic_inc(&caching_ctl->count);
        list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
        up_write(&fs_info->extent_commit_sem);
 
@@ -3847,9 +3888,9 @@ int btrfs_block_rsv_check(struct btrfs_root *root,
        return ret;
 }
 
-int btrfs_block_rsv_refill(struct btrfs_root *root,
-                         struct btrfs_block_rsv *block_rsv,
-                         u64 min_reserved)
+static inline int __btrfs_block_rsv_refill(struct btrfs_root *root,
+                                          struct btrfs_block_rsv *block_rsv,
+                                          u64 min_reserved, int flush)
 {
        u64 num_bytes = 0;
        int ret = -ENOSPC;
@@ -3868,7 +3909,7 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
        if (!ret)
                return 0;
 
-       ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1);
+       ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
        if (!ret) {
                block_rsv_add_bytes(block_rsv, num_bytes, 0);
                return 0;
@@ -3877,6 +3918,20 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
        return ret;
 }
 
+int btrfs_block_rsv_refill(struct btrfs_root *root,
+                          struct btrfs_block_rsv *block_rsv,
+                          u64 min_reserved)
+{
+       return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 1);
+}
+
+int btrfs_block_rsv_refill_noflush(struct btrfs_root *root,
+                                  struct btrfs_block_rsv *block_rsv,
+                                  u64 min_reserved)
+{
+       return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 0);
+}
+
 int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
                            struct btrfs_block_rsv *dst_rsv,
                            u64 num_bytes)
@@ -5052,11 +5107,11 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
        struct btrfs_root *root = orig_root->fs_info->extent_root;
        struct btrfs_free_cluster *last_ptr = NULL;
        struct btrfs_block_group_cache *block_group = NULL;
+       struct btrfs_block_group_cache *used_block_group;
        int empty_cluster = 2 * 1024 * 1024;
        int allowed_chunk_alloc = 0;
        int done_chunk_alloc = 0;
        struct btrfs_space_info *space_info;
-       int last_ptr_loop = 0;
        int loop = 0;
        int index = 0;
        int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
@@ -5118,6 +5173,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 ideal_cache:
                block_group = btrfs_lookup_block_group(root->fs_info,
                                                       search_start);
+               used_block_group = block_group;
                /*
                 * we don't want to use the block group if it doesn't match our
                 * allocation bits, or if its not cached.
@@ -5155,6 +5211,7 @@ search:
                u64 offset;
                int cached;
 
+               used_block_group = block_group;
                btrfs_get_block_group(block_group);
                search_start = block_group->key.objectid;
 
@@ -5178,13 +5235,15 @@ search:
                }
 
 have_block_group:
-               if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
+               cached = block_group_cache_done(block_group);
+               if (unlikely(!cached)) {
                        u64 free_percent;
 
+                       found_uncached_bg = true;
                        ret = cache_block_group(block_group, trans,
                                                orig_root, 1);
                        if (block_group->cached == BTRFS_CACHE_FINISHED)
-                               goto have_block_group;
+                               goto alloc;
 
                        free_percent = btrfs_block_group_used(&block_group->item);
                        free_percent *= 100;
@@ -5206,7 +5265,6 @@ have_block_group:
                                                        orig_root, 0);
                                BUG_ON(ret);
                        }
-                       found_uncached_bg = true;
 
                        /*
                         * If loop is set for cached only, try the next block
@@ -5216,94 +5274,80 @@ have_block_group:
                                goto loop;
                }
 
-               cached = block_group_cache_done(block_group);
-               if (unlikely(!cached))
-                       found_uncached_bg = true;
-
+alloc:
                if (unlikely(block_group->ro))
                        goto loop;
 
                spin_lock(&block_group->free_space_ctl->tree_lock);
                if (cached &&
                    block_group->free_space_ctl->free_space <
-                   num_bytes + empty_size) {
+                   num_bytes + empty_cluster + empty_size) {
                        spin_unlock(&block_group->free_space_ctl->tree_lock);
                        goto loop;
                }
                spin_unlock(&block_group->free_space_ctl->tree_lock);
 
                /*
-                * Ok we want to try and use the cluster allocator, so lets look
-                * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
-                * have tried the cluster allocator plenty of times at this
-                * point and not have found anything, so we are likely way too
-                * fragmented for the clustering stuff to find anything, so lets
-                * just skip it and let the allocator find whatever block it can
-                * find
+                * Ok we want to try and use the cluster allocator, so
+                * lets look there
                 */
-               if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) {
+               if (last_ptr) {
                        /*
                         * the refill lock keeps out other
                         * people trying to start a new cluster
                         */
                        spin_lock(&last_ptr->refill_lock);
-                       if (last_ptr->block_group &&
-                           (last_ptr->block_group->ro ||
-                           !block_group_bits(last_ptr->block_group, data))) {
-                               offset = 0;
+                       used_block_group = last_ptr->block_group;
+                       if (used_block_group != block_group &&
+                           (!used_block_group ||
+                            used_block_group->ro ||
+                            !block_group_bits(used_block_group, data))) {
+                               used_block_group = block_group;
                                goto refill_cluster;
                        }
 
-                       offset = btrfs_alloc_from_cluster(block_group, last_ptr,
-                                                num_bytes, search_start);
+                       if (used_block_group != block_group)
+                               btrfs_get_block_group(used_block_group);
+
+                       offset = btrfs_alloc_from_cluster(used_block_group,
+                         last_ptr, num_bytes, used_block_group->key.objectid);
                        if (offset) {
                                /* we have a block, we're done */
                                spin_unlock(&last_ptr->refill_lock);
                                goto checks;
                        }
 
-                       spin_lock(&last_ptr->lock);
-                       /*
-                        * whoops, this cluster doesn't actually point to
-                        * this block group.  Get a ref on the block
-                        * group is does point to and try again
-                        */
-                       if (!last_ptr_loop && last_ptr->block_group &&
-                           last_ptr->block_group != block_group &&
-                           index <=
-                                get_block_group_index(last_ptr->block_group)) {
-
-                               btrfs_put_block_group(block_group);
-                               block_group = last_ptr->block_group;
-                               btrfs_get_block_group(block_group);
-                               spin_unlock(&last_ptr->lock);
-                               spin_unlock(&last_ptr->refill_lock);
-
-                               last_ptr_loop = 1;
-                               search_start = block_group->key.objectid;
-                               /*
-                                * we know this block group is properly
-                                * in the list because
-                                * btrfs_remove_block_group, drops the
-                                * cluster before it removes the block
-                                * group from the list
-                                */
-                               goto have_block_group;
+                       WARN_ON(last_ptr->block_group != used_block_group);
+                       if (used_block_group != block_group) {
+                               btrfs_put_block_group(used_block_group);
+                               used_block_group = block_group;
                        }
-                       spin_unlock(&last_ptr->lock);
 refill_cluster:
+                       BUG_ON(used_block_group != block_group);
+                       /* If we are on LOOP_NO_EMPTY_SIZE, we can't
+                        * set up a new clusters, so lets just skip it
+                        * and let the allocator find whatever block
+                        * it can find.  If we reach this point, we
+                        * will have tried the cluster allocator
+                        * plenty of times and not have found
+                        * anything, so we are likely way too
+                        * fragmented for the clustering stuff to find
+                        * anything.  */
+                       if (loop >= LOOP_NO_EMPTY_SIZE) {
+                               spin_unlock(&last_ptr->refill_lock);
+                               goto unclustered_alloc;
+                       }
+
                        /*
                         * this cluster didn't work out, free it and
                         * start over
                         */
                        btrfs_return_cluster_to_free_space(NULL, last_ptr);
 
-                       last_ptr_loop = 0;
-
                        /* allocate a cluster in this block group */
                        ret = btrfs_find_space_cluster(trans, root,
                                               block_group, last_ptr,
-                                              offset, num_bytes,
+                                              search_start, num_bytes,
                                               empty_cluster + empty_size);
                        if (ret == 0) {
                                /*
@@ -5339,6 +5383,7 @@ refill_cluster:
                        goto loop;
                }
 
+unclustered_alloc:
                offset = btrfs_find_space_for_alloc(block_group, search_start,
                                                    num_bytes, empty_size);
                /*
@@ -5365,14 +5410,14 @@ checks:
                search_start = stripe_align(root, offset);
                /* move on to the next group */
                if (search_start + num_bytes >= search_end) {
-                       btrfs_add_free_space(block_group, offset, num_bytes);
+                       btrfs_add_free_space(used_block_group, offset, num_bytes);
                        goto loop;
                }
 
                /* move on to the next group */
                if (search_start + num_bytes >
-                   block_group->key.objectid + block_group->key.offset) {
-                       btrfs_add_free_space(block_group, offset, num_bytes);
+                   used_block_group->key.objectid + used_block_group->key.offset) {
+                       btrfs_add_free_space(used_block_group, offset, num_bytes);
                        goto loop;
                }
 
@@ -5380,14 +5425,14 @@ checks:
                ins->offset = num_bytes;
 
                if (offset < search_start)
-                       btrfs_add_free_space(block_group, offset,
+                       btrfs_add_free_space(used_block_group, offset,
                                             search_start - offset);
                BUG_ON(offset > search_start);
 
-               ret = btrfs_update_reserved_bytes(block_group, num_bytes,
+               ret = btrfs_update_reserved_bytes(used_block_group, num_bytes,
                                                  alloc_type);
                if (ret == -EAGAIN) {
-                       btrfs_add_free_space(block_group, offset, num_bytes);
+                       btrfs_add_free_space(used_block_group, offset, num_bytes);
                        goto loop;
                }
 
@@ -5396,15 +5441,19 @@ checks:
                ins->offset = num_bytes;
 
                if (offset < search_start)
-                       btrfs_add_free_space(block_group, offset,
+                       btrfs_add_free_space(used_block_group, offset,
                                             search_start - offset);
                BUG_ON(offset > search_start);
+               if (used_block_group != block_group)
+                       btrfs_put_block_group(used_block_group);
                btrfs_put_block_group(block_group);
                break;
 loop:
                failed_cluster_refill = false;
                failed_alloc = false;
                BUG_ON(index != get_block_group_index(block_group));
+               if (used_block_group != block_group)
+                       btrfs_put_block_group(used_block_group);
                btrfs_put_block_group(block_group);
        }
        up_read(&space_info->groups_sem);