btrfs: Make btrfs_async_run_delayed_root use a loop rather than multiple labels
[sfrench/cifs-2.6.git] / fs / btrfs / delayed-inode.c
index 5d73f79ded8bcbd967636b652d98433da64cceb8..cf0a6a25156c48bbc88cd166ba5a86affa2871c6 100644 (file)
@@ -87,6 +87,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
 
        spin_lock(&root->inode_lock);
        node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
+
        if (node) {
                if (btrfs_inode->delayed_node) {
                        refcount_inc(&node->refs);      /* can be accessed */
@@ -94,9 +95,30 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
                        spin_unlock(&root->inode_lock);
                        return node;
                }
-               btrfs_inode->delayed_node = node;
-               /* can be accessed and cached in the inode */
-               refcount_add(2, &node->refs);
+
+               /*
+                * It's possible that we're racing into the middle of removing
+                * this node from the radix tree.  In this case, the refcount
+                * was zero and it should never go back to one.  Just return
+                * NULL like it was never in the radix at all; our release
+                * function is in the process of removing it.
+                *
+                * Some implementations of refcount_inc refuse to bump the
+                * refcount once it has hit zero.  If we don't do this dance
+                * here, refcount_inc() may decide to just WARN_ONCE() instead
+                * of actually bumping the refcount.
+                *
+                * If this node is properly in the radix, we want to bump the
+                * refcount twice, once for the inode and once for this get
+                * operation.
+                */
+               if (refcount_inc_not_zero(&node->refs)) {
+                       refcount_inc(&node->refs);
+                       btrfs_inode->delayed_node = node;
+               } else {
+                       node = NULL;
+               }
+
                spin_unlock(&root->inode_lock);
                return node;
        }
@@ -254,17 +276,18 @@ static void __btrfs_release_delayed_node(
        mutex_unlock(&delayed_node->mutex);
 
        if (refcount_dec_and_test(&delayed_node->refs)) {
-               bool free = false;
                struct btrfs_root *root = delayed_node->root;
+
                spin_lock(&root->inode_lock);
-               if (refcount_read(&delayed_node->refs) == 0) {
-                       radix_tree_delete(&root->delayed_nodes_tree,
-                                         delayed_node->inode_id);
-                       free = true;
-               }
+               /*
+                * Once our refcount goes to zero, nobody is allowed to bump it
+                * back up.  We can delete it now.
+                */
+               ASSERT(refcount_read(&delayed_node->refs) == 0);
+               radix_tree_delete(&root->delayed_nodes_tree,
+                                 delayed_node->inode_id);
                spin_unlock(&root->inode_lock);
-               if (free)
-                       kmem_cache_free(delayed_node_cache, delayed_node);
+               kmem_cache_free(delayed_node_cache, delayed_node);
        }
 }
 
@@ -1279,40 +1302,42 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work)
        if (!path)
                goto out;
 
-again:
-       if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2)
-               goto free_path;
+       do {
+               if (atomic_read(&delayed_root->items) <
+                   BTRFS_DELAYED_BACKGROUND / 2)
+                       break;
 
-       delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
-       if (!delayed_node)
-               goto free_path;
+               delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
+               if (!delayed_node)
+                       break;
 
-       path->leave_spinning = 1;
-       root = delayed_node->root;
+               path->leave_spinning = 1;
+               root = delayed_node->root;
 
-       trans = btrfs_join_transaction(root);
-       if (IS_ERR(trans))
-               goto release_path;
+               trans = btrfs_join_transaction(root);
+               if (IS_ERR(trans)) {
+                       btrfs_release_path(path);
+                       btrfs_release_prepared_delayed_node(delayed_node);
+                       total_done++;
+                       continue;
+               }
 
-       block_rsv = trans->block_rsv;
-       trans->block_rsv = &root->fs_info->delayed_block_rsv;
+               block_rsv = trans->block_rsv;
+               trans->block_rsv = &root->fs_info->delayed_block_rsv;
 
-       __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
+               __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
 
-       trans->block_rsv = block_rsv;
-       btrfs_end_transaction(trans);
-       btrfs_btree_balance_dirty_nodelay(root->fs_info);
+               trans->block_rsv = block_rsv;
+               btrfs_end_transaction(trans);
+               btrfs_btree_balance_dirty_nodelay(root->fs_info);
 
-release_path:
-       btrfs_release_path(path);
-       total_done++;
+               btrfs_release_path(path);
+               btrfs_release_prepared_delayed_node(delayed_node);
+               total_done++;
 
-       btrfs_release_prepared_delayed_node(delayed_node);
-       if ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) ||
-           total_done < async_work->nr)
-               goto again;
+       } while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK)
+                || total_done < async_work->nr);
 
-free_path:
        btrfs_free_path(path);
 out:
        wake_up(&delayed_root->wait);