btrfs: Make btrfs_async_run_delayed_root use a loop rather than multiple labels
[sfrench/cifs-2.6.git] / fs / btrfs / delayed-inode.c
1 /*
2  * Copyright (C) 2011 Fujitsu.  All rights reserved.
3  * Written by Miao Xie <miaox@cn.fujitsu.com>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public
7  * License v2 as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public
15  * License along with this program; if not, write to the
16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17  * Boston, MA 021110-1307, USA.
18  */
19
20 #include <linux/slab.h>
21 #include "delayed-inode.h"
22 #include "disk-io.h"
23 #include "transaction.h"
24 #include "ctree.h"
25
26 #define BTRFS_DELAYED_WRITEBACK         512
27 #define BTRFS_DELAYED_BACKGROUND        128
28 #define BTRFS_DELAYED_BATCH             16
29
30 static struct kmem_cache *delayed_node_cache;
31
32 int __init btrfs_delayed_inode_init(void)
33 {
34         delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
35                                         sizeof(struct btrfs_delayed_node),
36                                         0,
37                                         SLAB_MEM_SPREAD,
38                                         NULL);
39         if (!delayed_node_cache)
40                 return -ENOMEM;
41         return 0;
42 }
43
44 void btrfs_delayed_inode_exit(void)
45 {
46         kmem_cache_destroy(delayed_node_cache);
47 }
48
49 static inline void btrfs_init_delayed_node(
50                                 struct btrfs_delayed_node *delayed_node,
51                                 struct btrfs_root *root, u64 inode_id)
52 {
53         delayed_node->root = root;
54         delayed_node->inode_id = inode_id;
55         refcount_set(&delayed_node->refs, 0);
56         delayed_node->ins_root = RB_ROOT;
57         delayed_node->del_root = RB_ROOT;
58         mutex_init(&delayed_node->mutex);
59         INIT_LIST_HEAD(&delayed_node->n_list);
60         INIT_LIST_HEAD(&delayed_node->p_list);
61 }
62
63 static inline int btrfs_is_continuous_delayed_item(
64                                         struct btrfs_delayed_item *item1,
65                                         struct btrfs_delayed_item *item2)
66 {
67         if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
68             item1->key.objectid == item2->key.objectid &&
69             item1->key.type == item2->key.type &&
70             item1->key.offset + 1 == item2->key.offset)
71                 return 1;
72         return 0;
73 }
74
75 static struct btrfs_delayed_node *btrfs_get_delayed_node(
76                 struct btrfs_inode *btrfs_inode)
77 {
78         struct btrfs_root *root = btrfs_inode->root;
79         u64 ino = btrfs_ino(btrfs_inode);
80         struct btrfs_delayed_node *node;
81
82         node = READ_ONCE(btrfs_inode->delayed_node);
83         if (node) {
84                 refcount_inc(&node->refs);
85                 return node;
86         }
87
88         spin_lock(&root->inode_lock);
89         node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
90
91         if (node) {
92                 if (btrfs_inode->delayed_node) {
93                         refcount_inc(&node->refs);      /* can be accessed */
94                         BUG_ON(btrfs_inode->delayed_node != node);
95                         spin_unlock(&root->inode_lock);
96                         return node;
97                 }
98
99                 /*
100                  * It's possible that we're racing into the middle of removing
101                  * this node from the radix tree.  In this case, the refcount
102                  * was zero and it should never go back to one.  Just return
103                  * NULL like it was never in the radix at all; our release
104                  * function is in the process of removing it.
105                  *
106                  * Some implementations of refcount_inc refuse to bump the
107                  * refcount once it has hit zero.  If we don't do this dance
108                  * here, refcount_inc() may decide to just WARN_ONCE() instead
109                  * of actually bumping the refcount.
110                  *
111                  * If this node is properly in the radix, we want to bump the
112                  * refcount twice, once for the inode and once for this get
113                  * operation.
114                  */
115                 if (refcount_inc_not_zero(&node->refs)) {
116                         refcount_inc(&node->refs);
117                         btrfs_inode->delayed_node = node;
118                 } else {
119                         node = NULL;
120                 }
121
122                 spin_unlock(&root->inode_lock);
123                 return node;
124         }
125         spin_unlock(&root->inode_lock);
126
127         return NULL;
128 }
129
130 /* Will return either the node or PTR_ERR(-ENOMEM) */
131 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
132                 struct btrfs_inode *btrfs_inode)
133 {
134         struct btrfs_delayed_node *node;
135         struct btrfs_root *root = btrfs_inode->root;
136         u64 ino = btrfs_ino(btrfs_inode);
137         int ret;
138
139 again:
140         node = btrfs_get_delayed_node(btrfs_inode);
141         if (node)
142                 return node;
143
144         node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
145         if (!node)
146                 return ERR_PTR(-ENOMEM);
147         btrfs_init_delayed_node(node, root, ino);
148
149         /* cached in the btrfs inode and can be accessed */
150         refcount_set(&node->refs, 2);
151
152         ret = radix_tree_preload(GFP_NOFS);
153         if (ret) {
154                 kmem_cache_free(delayed_node_cache, node);
155                 return ERR_PTR(ret);
156         }
157
158         spin_lock(&root->inode_lock);
159         ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
160         if (ret == -EEXIST) {
161                 spin_unlock(&root->inode_lock);
162                 kmem_cache_free(delayed_node_cache, node);
163                 radix_tree_preload_end();
164                 goto again;
165         }
166         btrfs_inode->delayed_node = node;
167         spin_unlock(&root->inode_lock);
168         radix_tree_preload_end();
169
170         return node;
171 }
172
173 /*
174  * Call it when holding delayed_node->mutex
175  *
176  * If mod = 1, add this node into the prepared list.
177  */
178 static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
179                                      struct btrfs_delayed_node *node,
180                                      int mod)
181 {
182         spin_lock(&root->lock);
183         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
184                 if (!list_empty(&node->p_list))
185                         list_move_tail(&node->p_list, &root->prepare_list);
186                 else if (mod)
187                         list_add_tail(&node->p_list, &root->prepare_list);
188         } else {
189                 list_add_tail(&node->n_list, &root->node_list);
190                 list_add_tail(&node->p_list, &root->prepare_list);
191                 refcount_inc(&node->refs);      /* inserted into list */
192                 root->nodes++;
193                 set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
194         }
195         spin_unlock(&root->lock);
196 }
197
198 /* Call it when holding delayed_node->mutex */
199 static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
200                                        struct btrfs_delayed_node *node)
201 {
202         spin_lock(&root->lock);
203         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
204                 root->nodes--;
205                 refcount_dec(&node->refs);      /* not in the list */
206                 list_del_init(&node->n_list);
207                 if (!list_empty(&node->p_list))
208                         list_del_init(&node->p_list);
209                 clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
210         }
211         spin_unlock(&root->lock);
212 }
213
214 static struct btrfs_delayed_node *btrfs_first_delayed_node(
215                         struct btrfs_delayed_root *delayed_root)
216 {
217         struct list_head *p;
218         struct btrfs_delayed_node *node = NULL;
219
220         spin_lock(&delayed_root->lock);
221         if (list_empty(&delayed_root->node_list))
222                 goto out;
223
224         p = delayed_root->node_list.next;
225         node = list_entry(p, struct btrfs_delayed_node, n_list);
226         refcount_inc(&node->refs);
227 out:
228         spin_unlock(&delayed_root->lock);
229
230         return node;
231 }
232
233 static struct btrfs_delayed_node *btrfs_next_delayed_node(
234                                                 struct btrfs_delayed_node *node)
235 {
236         struct btrfs_delayed_root *delayed_root;
237         struct list_head *p;
238         struct btrfs_delayed_node *next = NULL;
239
240         delayed_root = node->root->fs_info->delayed_root;
241         spin_lock(&delayed_root->lock);
242         if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
243                 /* not in the list */
244                 if (list_empty(&delayed_root->node_list))
245                         goto out;
246                 p = delayed_root->node_list.next;
247         } else if (list_is_last(&node->n_list, &delayed_root->node_list))
248                 goto out;
249         else
250                 p = node->n_list.next;
251
252         next = list_entry(p, struct btrfs_delayed_node, n_list);
253         refcount_inc(&next->refs);
254 out:
255         spin_unlock(&delayed_root->lock);
256
257         return next;
258 }
259
260 static void __btrfs_release_delayed_node(
261                                 struct btrfs_delayed_node *delayed_node,
262                                 int mod)
263 {
264         struct btrfs_delayed_root *delayed_root;
265
266         if (!delayed_node)
267                 return;
268
269         delayed_root = delayed_node->root->fs_info->delayed_root;
270
271         mutex_lock(&delayed_node->mutex);
272         if (delayed_node->count)
273                 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
274         else
275                 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
276         mutex_unlock(&delayed_node->mutex);
277
278         if (refcount_dec_and_test(&delayed_node->refs)) {
279                 struct btrfs_root *root = delayed_node->root;
280
281                 spin_lock(&root->inode_lock);
282                 /*
283                  * Once our refcount goes to zero, nobody is allowed to bump it
284                  * back up.  We can delete it now.
285                  */
286                 ASSERT(refcount_read(&delayed_node->refs) == 0);
287                 radix_tree_delete(&root->delayed_nodes_tree,
288                                   delayed_node->inode_id);
289                 spin_unlock(&root->inode_lock);
290                 kmem_cache_free(delayed_node_cache, delayed_node);
291         }
292 }
293
294 static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
295 {
296         __btrfs_release_delayed_node(node, 0);
297 }
298
299 static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
300                                         struct btrfs_delayed_root *delayed_root)
301 {
302         struct list_head *p;
303         struct btrfs_delayed_node *node = NULL;
304
305         spin_lock(&delayed_root->lock);
306         if (list_empty(&delayed_root->prepare_list))
307                 goto out;
308
309         p = delayed_root->prepare_list.next;
310         list_del_init(p);
311         node = list_entry(p, struct btrfs_delayed_node, p_list);
312         refcount_inc(&node->refs);
313 out:
314         spin_unlock(&delayed_root->lock);
315
316         return node;
317 }
318
319 static inline void btrfs_release_prepared_delayed_node(
320                                         struct btrfs_delayed_node *node)
321 {
322         __btrfs_release_delayed_node(node, 1);
323 }
324
325 static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
326 {
327         struct btrfs_delayed_item *item;
328         item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
329         if (item) {
330                 item->data_len = data_len;
331                 item->ins_or_del = 0;
332                 item->bytes_reserved = 0;
333                 item->delayed_node = NULL;
334                 refcount_set(&item->refs, 1);
335         }
336         return item;
337 }
338
339 /*
340  * __btrfs_lookup_delayed_item - look up the delayed item by key
341  * @delayed_node: pointer to the delayed node
342  * @key:          the key to look up
343  * @prev:         used to store the prev item if the right item isn't found
344  * @next:         used to store the next item if the right item isn't found
345  *
346  * Note: if we don't find the right item, we will return the prev item and
347  * the next item.
348  */
349 static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
350                                 struct rb_root *root,
351                                 struct btrfs_key *key,
352                                 struct btrfs_delayed_item **prev,
353                                 struct btrfs_delayed_item **next)
354 {
355         struct rb_node *node, *prev_node = NULL;
356         struct btrfs_delayed_item *delayed_item = NULL;
357         int ret = 0;
358
359         node = root->rb_node;
360
361         while (node) {
362                 delayed_item = rb_entry(node, struct btrfs_delayed_item,
363                                         rb_node);
364                 prev_node = node;
365                 ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
366                 if (ret < 0)
367                         node = node->rb_right;
368                 else if (ret > 0)
369                         node = node->rb_left;
370                 else
371                         return delayed_item;
372         }
373
374         if (prev) {
375                 if (!prev_node)
376                         *prev = NULL;
377                 else if (ret < 0)
378                         *prev = delayed_item;
379                 else if ((node = rb_prev(prev_node)) != NULL) {
380                         *prev = rb_entry(node, struct btrfs_delayed_item,
381                                          rb_node);
382                 } else
383                         *prev = NULL;
384         }
385
386         if (next) {
387                 if (!prev_node)
388                         *next = NULL;
389                 else if (ret > 0)
390                         *next = delayed_item;
391                 else if ((node = rb_next(prev_node)) != NULL) {
392                         *next = rb_entry(node, struct btrfs_delayed_item,
393                                          rb_node);
394                 } else
395                         *next = NULL;
396         }
397         return NULL;
398 }
399
400 static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
401                                         struct btrfs_delayed_node *delayed_node,
402                                         struct btrfs_key *key)
403 {
404         return __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
405                                            NULL, NULL);
406 }
407
408 static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
409                                     struct btrfs_delayed_item *ins,
410                                     int action)
411 {
412         struct rb_node **p, *node;
413         struct rb_node *parent_node = NULL;
414         struct rb_root *root;
415         struct btrfs_delayed_item *item;
416         int cmp;
417
418         if (action == BTRFS_DELAYED_INSERTION_ITEM)
419                 root = &delayed_node->ins_root;
420         else if (action == BTRFS_DELAYED_DELETION_ITEM)
421                 root = &delayed_node->del_root;
422         else
423                 BUG();
424         p = &root->rb_node;
425         node = &ins->rb_node;
426
427         while (*p) {
428                 parent_node = *p;
429                 item = rb_entry(parent_node, struct btrfs_delayed_item,
430                                  rb_node);
431
432                 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
433                 if (cmp < 0)
434                         p = &(*p)->rb_right;
435                 else if (cmp > 0)
436                         p = &(*p)->rb_left;
437                 else
438                         return -EEXIST;
439         }
440
441         rb_link_node(node, parent_node, p);
442         rb_insert_color(node, root);
443         ins->delayed_node = delayed_node;
444         ins->ins_or_del = action;
445
446         if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
447             action == BTRFS_DELAYED_INSERTION_ITEM &&
448             ins->key.offset >= delayed_node->index_cnt)
449                         delayed_node->index_cnt = ins->key.offset + 1;
450
451         delayed_node->count++;
452         atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
453         return 0;
454 }
455
456 static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
457                                               struct btrfs_delayed_item *item)
458 {
459         return __btrfs_add_delayed_item(node, item,
460                                         BTRFS_DELAYED_INSERTION_ITEM);
461 }
462
463 static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
464                                              struct btrfs_delayed_item *item)
465 {
466         return __btrfs_add_delayed_item(node, item,
467                                         BTRFS_DELAYED_DELETION_ITEM);
468 }
469
470 static void finish_one_item(struct btrfs_delayed_root *delayed_root)
471 {
472         int seq = atomic_inc_return(&delayed_root->items_seq);
473
474         /*
475          * atomic_dec_return implies a barrier for waitqueue_active
476          */
477         if ((atomic_dec_return(&delayed_root->items) <
478             BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
479             waitqueue_active(&delayed_root->wait))
480                 wake_up(&delayed_root->wait);
481 }
482
483 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
484 {
485         struct rb_root *root;
486         struct btrfs_delayed_root *delayed_root;
487
488         delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
489
490         BUG_ON(!delayed_root);
491         BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
492                delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
493
494         if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
495                 root = &delayed_item->delayed_node->ins_root;
496         else
497                 root = &delayed_item->delayed_node->del_root;
498
499         rb_erase(&delayed_item->rb_node, root);
500         delayed_item->delayed_node->count--;
501
502         finish_one_item(delayed_root);
503 }
504
505 static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
506 {
507         if (item) {
508                 __btrfs_remove_delayed_item(item);
509                 if (refcount_dec_and_test(&item->refs))
510                         kfree(item);
511         }
512 }
513
514 static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
515                                         struct btrfs_delayed_node *delayed_node)
516 {
517         struct rb_node *p;
518         struct btrfs_delayed_item *item = NULL;
519
520         p = rb_first(&delayed_node->ins_root);
521         if (p)
522                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
523
524         return item;
525 }
526
527 static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
528                                         struct btrfs_delayed_node *delayed_node)
529 {
530         struct rb_node *p;
531         struct btrfs_delayed_item *item = NULL;
532
533         p = rb_first(&delayed_node->del_root);
534         if (p)
535                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
536
537         return item;
538 }
539
540 static struct btrfs_delayed_item *__btrfs_next_delayed_item(
541                                                 struct btrfs_delayed_item *item)
542 {
543         struct rb_node *p;
544         struct btrfs_delayed_item *next = NULL;
545
546         p = rb_next(&item->rb_node);
547         if (p)
548                 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
549
550         return next;
551 }
552
553 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
554                                                struct btrfs_fs_info *fs_info,
555                                                struct btrfs_delayed_item *item)
556 {
557         struct btrfs_block_rsv *src_rsv;
558         struct btrfs_block_rsv *dst_rsv;
559         u64 num_bytes;
560         int ret;
561
562         if (!trans->bytes_reserved)
563                 return 0;
564
565         src_rsv = trans->block_rsv;
566         dst_rsv = &fs_info->delayed_block_rsv;
567
568         num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
569         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
570         if (!ret) {
571                 trace_btrfs_space_reservation(fs_info, "delayed_item",
572                                               item->key.objectid,
573                                               num_bytes, 1);
574                 item->bytes_reserved = num_bytes;
575         }
576
577         return ret;
578 }
579
580 static void btrfs_delayed_item_release_metadata(struct btrfs_fs_info *fs_info,
581                                                 struct btrfs_delayed_item *item)
582 {
583         struct btrfs_block_rsv *rsv;
584
585         if (!item->bytes_reserved)
586                 return;
587
588         rsv = &fs_info->delayed_block_rsv;
589         trace_btrfs_space_reservation(fs_info, "delayed_item",
590                                       item->key.objectid, item->bytes_reserved,
591                                       0);
592         btrfs_block_rsv_release(fs_info, rsv,
593                                 item->bytes_reserved);
594 }
595
596 static int btrfs_delayed_inode_reserve_metadata(
597                                         struct btrfs_trans_handle *trans,
598                                         struct btrfs_root *root,
599                                         struct btrfs_inode *inode,
600                                         struct btrfs_delayed_node *node)
601 {
602         struct btrfs_fs_info *fs_info = root->fs_info;
603         struct btrfs_block_rsv *src_rsv;
604         struct btrfs_block_rsv *dst_rsv;
605         u64 num_bytes;
606         int ret;
607
608         src_rsv = trans->block_rsv;
609         dst_rsv = &fs_info->delayed_block_rsv;
610
611         num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
612
613         /*
614          * btrfs_dirty_inode will update the inode under btrfs_join_transaction
615          * which doesn't reserve space for speed.  This is a problem since we
616          * still need to reserve space for this update, so try to reserve the
617          * space.
618          *
619          * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
620          * we always reserve enough to update the inode item.
621          */
622         if (!src_rsv || (!trans->bytes_reserved &&
623                          src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
624                 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
625                                           BTRFS_RESERVE_NO_FLUSH);
626                 /*
627                  * Since we're under a transaction reserve_metadata_bytes could
628                  * try to commit the transaction which will make it return
629                  * EAGAIN to make us stop the transaction we have, so return
630                  * ENOSPC instead so that btrfs_dirty_inode knows what to do.
631                  */
632                 if (ret == -EAGAIN)
633                         ret = -ENOSPC;
634                 if (!ret) {
635                         node->bytes_reserved = num_bytes;
636                         trace_btrfs_space_reservation(fs_info,
637                                                       "delayed_inode",
638                                                       btrfs_ino(inode),
639                                                       num_bytes, 1);
640                 }
641                 return ret;
642         }
643
644         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
645         if (!ret) {
646                 trace_btrfs_space_reservation(fs_info, "delayed_inode",
647                                               btrfs_ino(inode), num_bytes, 1);
648                 node->bytes_reserved = num_bytes;
649         }
650
651         return ret;
652 }
653
654 static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
655                                                 struct btrfs_delayed_node *node)
656 {
657         struct btrfs_block_rsv *rsv;
658
659         if (!node->bytes_reserved)
660                 return;
661
662         rsv = &fs_info->delayed_block_rsv;
663         trace_btrfs_space_reservation(fs_info, "delayed_inode",
664                                       node->inode_id, node->bytes_reserved, 0);
665         btrfs_block_rsv_release(fs_info, rsv,
666                                 node->bytes_reserved);
667         node->bytes_reserved = 0;
668 }
669
670 /*
671  * This helper will insert some continuous items into the same leaf according
672  * to the free space of the leaf.
673  */
674 static int btrfs_batch_insert_items(struct btrfs_root *root,
675                                     struct btrfs_path *path,
676                                     struct btrfs_delayed_item *item)
677 {
678         struct btrfs_fs_info *fs_info = root->fs_info;
679         struct btrfs_delayed_item *curr, *next;
680         int free_space;
681         int total_data_size = 0, total_size = 0;
682         struct extent_buffer *leaf;
683         char *data_ptr;
684         struct btrfs_key *keys;
685         u32 *data_size;
686         struct list_head head;
687         int slot;
688         int nitems;
689         int i;
690         int ret = 0;
691
692         BUG_ON(!path->nodes[0]);
693
694         leaf = path->nodes[0];
695         free_space = btrfs_leaf_free_space(fs_info, leaf);
696         INIT_LIST_HEAD(&head);
697
698         next = item;
699         nitems = 0;
700
701         /*
702          * count the number of the continuous items that we can insert in batch
703          */
704         while (total_size + next->data_len + sizeof(struct btrfs_item) <=
705                free_space) {
706                 total_data_size += next->data_len;
707                 total_size += next->data_len + sizeof(struct btrfs_item);
708                 list_add_tail(&next->tree_list, &head);
709                 nitems++;
710
711                 curr = next;
712                 next = __btrfs_next_delayed_item(curr);
713                 if (!next)
714                         break;
715
716                 if (!btrfs_is_continuous_delayed_item(curr, next))
717                         break;
718         }
719
720         if (!nitems) {
721                 ret = 0;
722                 goto out;
723         }
724
725         /*
726          * we need allocate some memory space, but it might cause the task
727          * to sleep, so we set all locked nodes in the path to blocking locks
728          * first.
729          */
730         btrfs_set_path_blocking(path);
731
732         keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
733         if (!keys) {
734                 ret = -ENOMEM;
735                 goto out;
736         }
737
738         data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
739         if (!data_size) {
740                 ret = -ENOMEM;
741                 goto error;
742         }
743
744         /* get keys of all the delayed items */
745         i = 0;
746         list_for_each_entry(next, &head, tree_list) {
747                 keys[i] = next->key;
748                 data_size[i] = next->data_len;
749                 i++;
750         }
751
752         /* reset all the locked nodes in the patch to spinning locks. */
753         btrfs_clear_path_blocking(path, NULL, 0);
754
755         /* insert the keys of the items */
756         setup_items_for_insert(root, path, keys, data_size,
757                                total_data_size, total_size, nitems);
758
759         /* insert the dir index items */
760         slot = path->slots[0];
761         list_for_each_entry_safe(curr, next, &head, tree_list) {
762                 data_ptr = btrfs_item_ptr(leaf, slot, char);
763                 write_extent_buffer(leaf, &curr->data,
764                                     (unsigned long)data_ptr,
765                                     curr->data_len);
766                 slot++;
767
768                 btrfs_delayed_item_release_metadata(fs_info, curr);
769
770                 list_del(&curr->tree_list);
771                 btrfs_release_delayed_item(curr);
772         }
773
774 error:
775         kfree(data_size);
776         kfree(keys);
777 out:
778         return ret;
779 }
780
781 /*
782  * This helper can just do simple insertion that needn't extend item for new
783  * data, such as directory name index insertion, inode insertion.
784  */
785 static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
786                                      struct btrfs_root *root,
787                                      struct btrfs_path *path,
788                                      struct btrfs_delayed_item *delayed_item)
789 {
790         struct btrfs_fs_info *fs_info = root->fs_info;
791         struct extent_buffer *leaf;
792         char *ptr;
793         int ret;
794
795         ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
796                                       delayed_item->data_len);
797         if (ret < 0 && ret != -EEXIST)
798                 return ret;
799
800         leaf = path->nodes[0];
801
802         ptr = btrfs_item_ptr(leaf, path->slots[0], char);
803
804         write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
805                             delayed_item->data_len);
806         btrfs_mark_buffer_dirty(leaf);
807
808         btrfs_delayed_item_release_metadata(fs_info, delayed_item);
809         return 0;
810 }
811
812 /*
813  * we insert an item first, then if there are some continuous items, we try
814  * to insert those items into the same leaf.
815  */
816 static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
817                                       struct btrfs_path *path,
818                                       struct btrfs_root *root,
819                                       struct btrfs_delayed_node *node)
820 {
821         struct btrfs_delayed_item *curr, *prev;
822         int ret = 0;
823
824 do_again:
825         mutex_lock(&node->mutex);
826         curr = __btrfs_first_delayed_insertion_item(node);
827         if (!curr)
828                 goto insert_end;
829
830         ret = btrfs_insert_delayed_item(trans, root, path, curr);
831         if (ret < 0) {
832                 btrfs_release_path(path);
833                 goto insert_end;
834         }
835
836         prev = curr;
837         curr = __btrfs_next_delayed_item(prev);
838         if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
839                 /* insert the continuous items into the same leaf */
840                 path->slots[0]++;
841                 btrfs_batch_insert_items(root, path, curr);
842         }
843         btrfs_release_delayed_item(prev);
844         btrfs_mark_buffer_dirty(path->nodes[0]);
845
846         btrfs_release_path(path);
847         mutex_unlock(&node->mutex);
848         goto do_again;
849
850 insert_end:
851         mutex_unlock(&node->mutex);
852         return ret;
853 }
854
855 static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
856                                     struct btrfs_root *root,
857                                     struct btrfs_path *path,
858                                     struct btrfs_delayed_item *item)
859 {
860         struct btrfs_fs_info *fs_info = root->fs_info;
861         struct btrfs_delayed_item *curr, *next;
862         struct extent_buffer *leaf;
863         struct btrfs_key key;
864         struct list_head head;
865         int nitems, i, last_item;
866         int ret = 0;
867
868         BUG_ON(!path->nodes[0]);
869
870         leaf = path->nodes[0];
871
872         i = path->slots[0];
873         last_item = btrfs_header_nritems(leaf) - 1;
874         if (i > last_item)
875                 return -ENOENT; /* FIXME: Is errno suitable? */
876
877         next = item;
878         INIT_LIST_HEAD(&head);
879         btrfs_item_key_to_cpu(leaf, &key, i);
880         nitems = 0;
881         /*
882          * count the number of the dir index items that we can delete in batch
883          */
884         while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
885                 list_add_tail(&next->tree_list, &head);
886                 nitems++;
887
888                 curr = next;
889                 next = __btrfs_next_delayed_item(curr);
890                 if (!next)
891                         break;
892
893                 if (!btrfs_is_continuous_delayed_item(curr, next))
894                         break;
895
896                 i++;
897                 if (i > last_item)
898                         break;
899                 btrfs_item_key_to_cpu(leaf, &key, i);
900         }
901
902         if (!nitems)
903                 return 0;
904
905         ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
906         if (ret)
907                 goto out;
908
909         list_for_each_entry_safe(curr, next, &head, tree_list) {
910                 btrfs_delayed_item_release_metadata(fs_info, curr);
911                 list_del(&curr->tree_list);
912                 btrfs_release_delayed_item(curr);
913         }
914
915 out:
916         return ret;
917 }
918
919 static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
920                                       struct btrfs_path *path,
921                                       struct btrfs_root *root,
922                                       struct btrfs_delayed_node *node)
923 {
924         struct btrfs_delayed_item *curr, *prev;
925         int ret = 0;
926
927 do_again:
928         mutex_lock(&node->mutex);
929         curr = __btrfs_first_delayed_deletion_item(node);
930         if (!curr)
931                 goto delete_fail;
932
933         ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
934         if (ret < 0)
935                 goto delete_fail;
936         else if (ret > 0) {
937                 /*
938                  * can't find the item which the node points to, so this node
939                  * is invalid, just drop it.
940                  */
941                 prev = curr;
942                 curr = __btrfs_next_delayed_item(prev);
943                 btrfs_release_delayed_item(prev);
944                 ret = 0;
945                 btrfs_release_path(path);
946                 if (curr) {
947                         mutex_unlock(&node->mutex);
948                         goto do_again;
949                 } else
950                         goto delete_fail;
951         }
952
953         btrfs_batch_delete_items(trans, root, path, curr);
954         btrfs_release_path(path);
955         mutex_unlock(&node->mutex);
956         goto do_again;
957
958 delete_fail:
959         btrfs_release_path(path);
960         mutex_unlock(&node->mutex);
961         return ret;
962 }
963
964 static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
965 {
966         struct btrfs_delayed_root *delayed_root;
967
968         if (delayed_node &&
969             test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
970                 BUG_ON(!delayed_node->root);
971                 clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
972                 delayed_node->count--;
973
974                 delayed_root = delayed_node->root->fs_info->delayed_root;
975                 finish_one_item(delayed_root);
976         }
977 }
978
979 static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
980 {
981         struct btrfs_delayed_root *delayed_root;
982
983         ASSERT(delayed_node->root);
984         clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
985         delayed_node->count--;
986
987         delayed_root = delayed_node->root->fs_info->delayed_root;
988         finish_one_item(delayed_root);
989 }
990
991 static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
992                                         struct btrfs_root *root,
993                                         struct btrfs_path *path,
994                                         struct btrfs_delayed_node *node)
995 {
996         struct btrfs_fs_info *fs_info = root->fs_info;
997         struct btrfs_key key;
998         struct btrfs_inode_item *inode_item;
999         struct extent_buffer *leaf;
1000         int mod;
1001         int ret;
1002
1003         key.objectid = node->inode_id;
1004         key.type = BTRFS_INODE_ITEM_KEY;
1005         key.offset = 0;
1006
1007         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1008                 mod = -1;
1009         else
1010                 mod = 1;
1011
1012         ret = btrfs_lookup_inode(trans, root, path, &key, mod);
1013         if (ret > 0) {
1014                 btrfs_release_path(path);
1015                 return -ENOENT;
1016         } else if (ret < 0) {
1017                 return ret;
1018         }
1019
1020         leaf = path->nodes[0];
1021         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1022                                     struct btrfs_inode_item);
1023         write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1024                             sizeof(struct btrfs_inode_item));
1025         btrfs_mark_buffer_dirty(leaf);
1026
1027         if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1028                 goto no_iref;
1029
1030         path->slots[0]++;
1031         if (path->slots[0] >= btrfs_header_nritems(leaf))
1032                 goto search;
1033 again:
1034         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1035         if (key.objectid != node->inode_id)
1036                 goto out;
1037
1038         if (key.type != BTRFS_INODE_REF_KEY &&
1039             key.type != BTRFS_INODE_EXTREF_KEY)
1040                 goto out;
1041
1042         /*
1043          * Delayed iref deletion is for the inode who has only one link,
1044          * so there is only one iref. The case that several irefs are
1045          * in the same item doesn't exist.
1046          */
1047         btrfs_del_item(trans, root, path);
1048 out:
1049         btrfs_release_delayed_iref(node);
1050 no_iref:
1051         btrfs_release_path(path);
1052 err_out:
1053         btrfs_delayed_inode_release_metadata(fs_info, node);
1054         btrfs_release_delayed_inode(node);
1055
1056         return ret;
1057
1058 search:
1059         btrfs_release_path(path);
1060
1061         key.type = BTRFS_INODE_EXTREF_KEY;
1062         key.offset = -1;
1063         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1064         if (ret < 0)
1065                 goto err_out;
1066         ASSERT(ret);
1067
1068         ret = 0;
1069         leaf = path->nodes[0];
1070         path->slots[0]--;
1071         goto again;
1072 }
1073
1074 static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1075                                              struct btrfs_root *root,
1076                                              struct btrfs_path *path,
1077                                              struct btrfs_delayed_node *node)
1078 {
1079         int ret;
1080
1081         mutex_lock(&node->mutex);
1082         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) {
1083                 mutex_unlock(&node->mutex);
1084                 return 0;
1085         }
1086
1087         ret = __btrfs_update_delayed_inode(trans, root, path, node);
1088         mutex_unlock(&node->mutex);
1089         return ret;
1090 }
1091
1092 static inline int
1093 __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1094                                    struct btrfs_path *path,
1095                                    struct btrfs_delayed_node *node)
1096 {
1097         int ret;
1098
1099         ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1100         if (ret)
1101                 return ret;
1102
1103         ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1104         if (ret)
1105                 return ret;
1106
1107         ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1108         return ret;
1109 }
1110
1111 /*
1112  * Called when committing the transaction.
1113  * Returns 0 on success.
1114  * Returns < 0 on error and returns with an aborted transaction with any
1115  * outstanding delayed items cleaned up.
1116  */
1117 static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1118                                      struct btrfs_fs_info *fs_info, int nr)
1119 {
1120         struct btrfs_delayed_root *delayed_root;
1121         struct btrfs_delayed_node *curr_node, *prev_node;
1122         struct btrfs_path *path;
1123         struct btrfs_block_rsv *block_rsv;
1124         int ret = 0;
1125         bool count = (nr > 0);
1126
1127         if (trans->aborted)
1128                 return -EIO;
1129
1130         path = btrfs_alloc_path();
1131         if (!path)
1132                 return -ENOMEM;
1133         path->leave_spinning = 1;
1134
1135         block_rsv = trans->block_rsv;
1136         trans->block_rsv = &fs_info->delayed_block_rsv;
1137
1138         delayed_root = fs_info->delayed_root;
1139
1140         curr_node = btrfs_first_delayed_node(delayed_root);
1141         while (curr_node && (!count || (count && nr--))) {
1142                 ret = __btrfs_commit_inode_delayed_items(trans, path,
1143                                                          curr_node);
1144                 if (ret) {
1145                         btrfs_release_delayed_node(curr_node);
1146                         curr_node = NULL;
1147                         btrfs_abort_transaction(trans, ret);
1148                         break;
1149                 }
1150
1151                 prev_node = curr_node;
1152                 curr_node = btrfs_next_delayed_node(curr_node);
1153                 btrfs_release_delayed_node(prev_node);
1154         }
1155
1156         if (curr_node)
1157                 btrfs_release_delayed_node(curr_node);
1158         btrfs_free_path(path);
1159         trans->block_rsv = block_rsv;
1160
1161         return ret;
1162 }
1163
1164 int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1165                             struct btrfs_fs_info *fs_info)
1166 {
1167         return __btrfs_run_delayed_items(trans, fs_info, -1);
1168 }
1169
1170 int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
1171                                struct btrfs_fs_info *fs_info, int nr)
1172 {
1173         return __btrfs_run_delayed_items(trans, fs_info, nr);
1174 }
1175
1176 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1177                                      struct btrfs_inode *inode)
1178 {
1179         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1180         struct btrfs_path *path;
1181         struct btrfs_block_rsv *block_rsv;
1182         int ret;
1183
1184         if (!delayed_node)
1185                 return 0;
1186
1187         mutex_lock(&delayed_node->mutex);
1188         if (!delayed_node->count) {
1189                 mutex_unlock(&delayed_node->mutex);
1190                 btrfs_release_delayed_node(delayed_node);
1191                 return 0;
1192         }
1193         mutex_unlock(&delayed_node->mutex);
1194
1195         path = btrfs_alloc_path();
1196         if (!path) {
1197                 btrfs_release_delayed_node(delayed_node);
1198                 return -ENOMEM;
1199         }
1200         path->leave_spinning = 1;
1201
1202         block_rsv = trans->block_rsv;
1203         trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1204
1205         ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1206
1207         btrfs_release_delayed_node(delayed_node);
1208         btrfs_free_path(path);
1209         trans->block_rsv = block_rsv;
1210
1211         return ret;
1212 }
1213
1214 int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
1215 {
1216         struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
1217         struct btrfs_trans_handle *trans;
1218         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1219         struct btrfs_path *path;
1220         struct btrfs_block_rsv *block_rsv;
1221         int ret;
1222
1223         if (!delayed_node)
1224                 return 0;
1225
1226         mutex_lock(&delayed_node->mutex);
1227         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1228                 mutex_unlock(&delayed_node->mutex);
1229                 btrfs_release_delayed_node(delayed_node);
1230                 return 0;
1231         }
1232         mutex_unlock(&delayed_node->mutex);
1233
1234         trans = btrfs_join_transaction(delayed_node->root);
1235         if (IS_ERR(trans)) {
1236                 ret = PTR_ERR(trans);
1237                 goto out;
1238         }
1239
1240         path = btrfs_alloc_path();
1241         if (!path) {
1242                 ret = -ENOMEM;
1243                 goto trans_out;
1244         }
1245         path->leave_spinning = 1;
1246
1247         block_rsv = trans->block_rsv;
1248         trans->block_rsv = &fs_info->delayed_block_rsv;
1249
1250         mutex_lock(&delayed_node->mutex);
1251         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags))
1252                 ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1253                                                    path, delayed_node);
1254         else
1255                 ret = 0;
1256         mutex_unlock(&delayed_node->mutex);
1257
1258         btrfs_free_path(path);
1259         trans->block_rsv = block_rsv;
1260 trans_out:
1261         btrfs_end_transaction(trans);
1262         btrfs_btree_balance_dirty(fs_info);
1263 out:
1264         btrfs_release_delayed_node(delayed_node);
1265
1266         return ret;
1267 }
1268
1269 void btrfs_remove_delayed_node(struct btrfs_inode *inode)
1270 {
1271         struct btrfs_delayed_node *delayed_node;
1272
1273         delayed_node = READ_ONCE(inode->delayed_node);
1274         if (!delayed_node)
1275                 return;
1276
1277         inode->delayed_node = NULL;
1278         btrfs_release_delayed_node(delayed_node);
1279 }
1280
1281 struct btrfs_async_delayed_work {
1282         struct btrfs_delayed_root *delayed_root;
1283         int nr;
1284         struct btrfs_work work;
1285 };
1286
1287 static void btrfs_async_run_delayed_root(struct btrfs_work *work)
1288 {
1289         struct btrfs_async_delayed_work *async_work;
1290         struct btrfs_delayed_root *delayed_root;
1291         struct btrfs_trans_handle *trans;
1292         struct btrfs_path *path;
1293         struct btrfs_delayed_node *delayed_node = NULL;
1294         struct btrfs_root *root;
1295         struct btrfs_block_rsv *block_rsv;
1296         int total_done = 0;
1297
1298         async_work = container_of(work, struct btrfs_async_delayed_work, work);
1299         delayed_root = async_work->delayed_root;
1300
1301         path = btrfs_alloc_path();
1302         if (!path)
1303                 goto out;
1304
1305         do {
1306                 if (atomic_read(&delayed_root->items) <
1307                     BTRFS_DELAYED_BACKGROUND / 2)
1308                         break;
1309
1310                 delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
1311                 if (!delayed_node)
1312                         break;
1313
1314                 path->leave_spinning = 1;
1315                 root = delayed_node->root;
1316
1317                 trans = btrfs_join_transaction(root);
1318                 if (IS_ERR(trans)) {
1319                         btrfs_release_path(path);
1320                         btrfs_release_prepared_delayed_node(delayed_node);
1321                         total_done++;
1322                         continue;
1323                 }
1324
1325                 block_rsv = trans->block_rsv;
1326                 trans->block_rsv = &root->fs_info->delayed_block_rsv;
1327
1328                 __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1329
1330                 trans->block_rsv = block_rsv;
1331                 btrfs_end_transaction(trans);
1332                 btrfs_btree_balance_dirty_nodelay(root->fs_info);
1333
1334                 btrfs_release_path(path);
1335                 btrfs_release_prepared_delayed_node(delayed_node);
1336                 total_done++;
1337
1338         } while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK)
1339                  || total_done < async_work->nr);
1340
1341         btrfs_free_path(path);
1342 out:
1343         wake_up(&delayed_root->wait);
1344         kfree(async_work);
1345 }
1346
1347
1348 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1349                                      struct btrfs_fs_info *fs_info, int nr)
1350 {
1351         struct btrfs_async_delayed_work *async_work;
1352
1353         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND ||
1354             btrfs_workqueue_normal_congested(fs_info->delayed_workers))
1355                 return 0;
1356
1357         async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
1358         if (!async_work)
1359                 return -ENOMEM;
1360
1361         async_work->delayed_root = delayed_root;
1362         btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
1363                         btrfs_async_run_delayed_root, NULL, NULL);
1364         async_work->nr = nr;
1365
1366         btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
1367         return 0;
1368 }
1369
1370 void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
1371 {
1372         WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root));
1373 }
1374
1375 static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
1376 {
1377         int val = atomic_read(&delayed_root->items_seq);
1378
1379         if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
1380                 return 1;
1381
1382         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1383                 return 1;
1384
1385         return 0;
1386 }
1387
1388 void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
1389 {
1390         struct btrfs_delayed_root *delayed_root = fs_info->delayed_root;
1391
1392         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1393                 return;
1394
1395         if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1396                 int seq;
1397                 int ret;
1398
1399                 seq = atomic_read(&delayed_root->items_seq);
1400
1401                 ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0);
1402                 if (ret)
1403                         return;
1404
1405                 wait_event_interruptible(delayed_root->wait,
1406                                          could_end_wait(delayed_root, seq));
1407                 return;
1408         }
1409
1410         btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
1411 }
1412
1413 /* Will return 0 or -ENOMEM */
1414 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1415                                    struct btrfs_fs_info *fs_info,
1416                                    const char *name, int name_len,
1417                                    struct btrfs_inode *dir,
1418                                    struct btrfs_disk_key *disk_key, u8 type,
1419                                    u64 index)
1420 {
1421         struct btrfs_delayed_node *delayed_node;
1422         struct btrfs_delayed_item *delayed_item;
1423         struct btrfs_dir_item *dir_item;
1424         int ret;
1425
1426         delayed_node = btrfs_get_or_create_delayed_node(dir);
1427         if (IS_ERR(delayed_node))
1428                 return PTR_ERR(delayed_node);
1429
1430         delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1431         if (!delayed_item) {
1432                 ret = -ENOMEM;
1433                 goto release_node;
1434         }
1435
1436         delayed_item->key.objectid = btrfs_ino(dir);
1437         delayed_item->key.type = BTRFS_DIR_INDEX_KEY;
1438         delayed_item->key.offset = index;
1439
1440         dir_item = (struct btrfs_dir_item *)delayed_item->data;
1441         dir_item->location = *disk_key;
1442         btrfs_set_stack_dir_transid(dir_item, trans->transid);
1443         btrfs_set_stack_dir_data_len(dir_item, 0);
1444         btrfs_set_stack_dir_name_len(dir_item, name_len);
1445         btrfs_set_stack_dir_type(dir_item, type);
1446         memcpy((char *)(dir_item + 1), name, name_len);
1447
1448         ret = btrfs_delayed_item_reserve_metadata(trans, fs_info, delayed_item);
1449         /*
1450          * we have reserved enough space when we start a new transaction,
1451          * so reserving metadata failure is impossible
1452          */
1453         BUG_ON(ret);
1454
1455
1456         mutex_lock(&delayed_node->mutex);
1457         ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1458         if (unlikely(ret)) {
1459                 btrfs_err(fs_info,
1460                           "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1461                           name_len, name, delayed_node->root->objectid,
1462                           delayed_node->inode_id, ret);
1463                 BUG();
1464         }
1465         mutex_unlock(&delayed_node->mutex);
1466
1467 release_node:
1468         btrfs_release_delayed_node(delayed_node);
1469         return ret;
1470 }
1471
1472 static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
1473                                                struct btrfs_delayed_node *node,
1474                                                struct btrfs_key *key)
1475 {
1476         struct btrfs_delayed_item *item;
1477
1478         mutex_lock(&node->mutex);
1479         item = __btrfs_lookup_delayed_insertion_item(node, key);
1480         if (!item) {
1481                 mutex_unlock(&node->mutex);
1482                 return 1;
1483         }
1484
1485         btrfs_delayed_item_release_metadata(fs_info, item);
1486         btrfs_release_delayed_item(item);
1487         mutex_unlock(&node->mutex);
1488         return 0;
1489 }
1490
1491 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1492                                    struct btrfs_fs_info *fs_info,
1493                                    struct btrfs_inode *dir, u64 index)
1494 {
1495         struct btrfs_delayed_node *node;
1496         struct btrfs_delayed_item *item;
1497         struct btrfs_key item_key;
1498         int ret;
1499
1500         node = btrfs_get_or_create_delayed_node(dir);
1501         if (IS_ERR(node))
1502                 return PTR_ERR(node);
1503
1504         item_key.objectid = btrfs_ino(dir);
1505         item_key.type = BTRFS_DIR_INDEX_KEY;
1506         item_key.offset = index;
1507
1508         ret = btrfs_delete_delayed_insertion_item(fs_info, node, &item_key);
1509         if (!ret)
1510                 goto end;
1511
1512         item = btrfs_alloc_delayed_item(0);
1513         if (!item) {
1514                 ret = -ENOMEM;
1515                 goto end;
1516         }
1517
1518         item->key = item_key;
1519
1520         ret = btrfs_delayed_item_reserve_metadata(trans, fs_info, item);
1521         /*
1522          * we have reserved enough space when we start a new transaction,
1523          * so reserving metadata failure is impossible.
1524          */
1525         BUG_ON(ret);
1526
1527         mutex_lock(&node->mutex);
1528         ret = __btrfs_add_delayed_deletion_item(node, item);
1529         if (unlikely(ret)) {
1530                 btrfs_err(fs_info,
1531                           "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1532                           index, node->root->objectid, node->inode_id, ret);
1533                 BUG();
1534         }
1535         mutex_unlock(&node->mutex);
1536 end:
1537         btrfs_release_delayed_node(node);
1538         return ret;
1539 }
1540
1541 int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
1542 {
1543         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1544
1545         if (!delayed_node)
1546                 return -ENOENT;
1547
1548         /*
1549          * Since we have held i_mutex of this directory, it is impossible that
1550          * a new directory index is added into the delayed node and index_cnt
1551          * is updated now. So we needn't lock the delayed node.
1552          */
1553         if (!delayed_node->index_cnt) {
1554                 btrfs_release_delayed_node(delayed_node);
1555                 return -EINVAL;
1556         }
1557
1558         inode->index_cnt = delayed_node->index_cnt;
1559         btrfs_release_delayed_node(delayed_node);
1560         return 0;
1561 }
1562
1563 bool btrfs_readdir_get_delayed_items(struct inode *inode,
1564                                      struct list_head *ins_list,
1565                                      struct list_head *del_list)
1566 {
1567         struct btrfs_delayed_node *delayed_node;
1568         struct btrfs_delayed_item *item;
1569
1570         delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1571         if (!delayed_node)
1572                 return false;
1573
1574         /*
1575          * We can only do one readdir with delayed items at a time because of
1576          * item->readdir_list.
1577          */
1578         inode_unlock_shared(inode);
1579         inode_lock(inode);
1580
1581         mutex_lock(&delayed_node->mutex);
1582         item = __btrfs_first_delayed_insertion_item(delayed_node);
1583         while (item) {
1584                 refcount_inc(&item->refs);
1585                 list_add_tail(&item->readdir_list, ins_list);
1586                 item = __btrfs_next_delayed_item(item);
1587         }
1588
1589         item = __btrfs_first_delayed_deletion_item(delayed_node);
1590         while (item) {
1591                 refcount_inc(&item->refs);
1592                 list_add_tail(&item->readdir_list, del_list);
1593                 item = __btrfs_next_delayed_item(item);
1594         }
1595         mutex_unlock(&delayed_node->mutex);
1596         /*
1597          * This delayed node is still cached in the btrfs inode, so refs
1598          * must be > 1 now, and we needn't check it is going to be freed
1599          * or not.
1600          *
1601          * Besides that, this function is used to read dir, we do not
1602          * insert/delete delayed items in this period. So we also needn't
1603          * requeue or dequeue this delayed node.
1604          */
1605         refcount_dec(&delayed_node->refs);
1606
1607         return true;
1608 }
1609
1610 void btrfs_readdir_put_delayed_items(struct inode *inode,
1611                                      struct list_head *ins_list,
1612                                      struct list_head *del_list)
1613 {
1614         struct btrfs_delayed_item *curr, *next;
1615
1616         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1617                 list_del(&curr->readdir_list);
1618                 if (refcount_dec_and_test(&curr->refs))
1619                         kfree(curr);
1620         }
1621
1622         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1623                 list_del(&curr->readdir_list);
1624                 if (refcount_dec_and_test(&curr->refs))
1625                         kfree(curr);
1626         }
1627
1628         /*
1629          * The VFS is going to do up_read(), so we need to downgrade back to a
1630          * read lock.
1631          */
1632         downgrade_write(&inode->i_rwsem);
1633 }
1634
1635 int btrfs_should_delete_dir_index(struct list_head *del_list,
1636                                   u64 index)
1637 {
1638         struct btrfs_delayed_item *curr, *next;
1639         int ret;
1640
1641         if (list_empty(del_list))
1642                 return 0;
1643
1644         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1645                 if (curr->key.offset > index)
1646                         break;
1647
1648                 list_del(&curr->readdir_list);
1649                 ret = (curr->key.offset == index);
1650
1651                 if (refcount_dec_and_test(&curr->refs))
1652                         kfree(curr);
1653
1654                 if (ret)
1655                         return 1;
1656                 else
1657                         continue;
1658         }
1659         return 0;
1660 }
1661
1662 /*
1663  * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1664  *
1665  */
1666 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1667                                     struct list_head *ins_list)
1668 {
1669         struct btrfs_dir_item *di;
1670         struct btrfs_delayed_item *curr, *next;
1671         struct btrfs_key location;
1672         char *name;
1673         int name_len;
1674         int over = 0;
1675         unsigned char d_type;
1676
1677         if (list_empty(ins_list))
1678                 return 0;
1679
1680         /*
1681          * Changing the data of the delayed item is impossible. So
1682          * we needn't lock them. And we have held i_mutex of the
1683          * directory, nobody can delete any directory indexes now.
1684          */
1685         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1686                 list_del(&curr->readdir_list);
1687
1688                 if (curr->key.offset < ctx->pos) {
1689                         if (refcount_dec_and_test(&curr->refs))
1690                                 kfree(curr);
1691                         continue;
1692                 }
1693
1694                 ctx->pos = curr->key.offset;
1695
1696                 di = (struct btrfs_dir_item *)curr->data;
1697                 name = (char *)(di + 1);
1698                 name_len = btrfs_stack_dir_name_len(di);
1699
1700                 d_type = btrfs_filetype_table[di->type];
1701                 btrfs_disk_key_to_cpu(&location, &di->location);
1702
1703                 over = !dir_emit(ctx, name, name_len,
1704                                location.objectid, d_type);
1705
1706                 if (refcount_dec_and_test(&curr->refs))
1707                         kfree(curr);
1708
1709                 if (over)
1710                         return 1;
1711                 ctx->pos++;
1712         }
1713         return 0;
1714 }
1715
1716 static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1717                                   struct btrfs_inode_item *inode_item,
1718                                   struct inode *inode)
1719 {
1720         btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
1721         btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
1722         btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1723         btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1724         btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1725         btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1726         btrfs_set_stack_inode_generation(inode_item,
1727                                          BTRFS_I(inode)->generation);
1728         btrfs_set_stack_inode_sequence(inode_item, inode->i_version);
1729         btrfs_set_stack_inode_transid(inode_item, trans->transid);
1730         btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1731         btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1732         btrfs_set_stack_inode_block_group(inode_item, 0);
1733
1734         btrfs_set_stack_timespec_sec(&inode_item->atime,
1735                                      inode->i_atime.tv_sec);
1736         btrfs_set_stack_timespec_nsec(&inode_item->atime,
1737                                       inode->i_atime.tv_nsec);
1738
1739         btrfs_set_stack_timespec_sec(&inode_item->mtime,
1740                                      inode->i_mtime.tv_sec);
1741         btrfs_set_stack_timespec_nsec(&inode_item->mtime,
1742                                       inode->i_mtime.tv_nsec);
1743
1744         btrfs_set_stack_timespec_sec(&inode_item->ctime,
1745                                      inode->i_ctime.tv_sec);
1746         btrfs_set_stack_timespec_nsec(&inode_item->ctime,
1747                                       inode->i_ctime.tv_nsec);
1748
1749         btrfs_set_stack_timespec_sec(&inode_item->otime,
1750                                      BTRFS_I(inode)->i_otime.tv_sec);
1751         btrfs_set_stack_timespec_nsec(&inode_item->otime,
1752                                      BTRFS_I(inode)->i_otime.tv_nsec);
1753 }
1754
1755 int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1756 {
1757         struct btrfs_delayed_node *delayed_node;
1758         struct btrfs_inode_item *inode_item;
1759
1760         delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1761         if (!delayed_node)
1762                 return -ENOENT;
1763
1764         mutex_lock(&delayed_node->mutex);
1765         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1766                 mutex_unlock(&delayed_node->mutex);
1767                 btrfs_release_delayed_node(delayed_node);
1768                 return -ENOENT;
1769         }
1770
1771         inode_item = &delayed_node->inode_item;
1772
1773         i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
1774         i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
1775         btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
1776         inode->i_mode = btrfs_stack_inode_mode(inode_item);
1777         set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1778         inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1779         BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1780         BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
1781
1782         inode->i_version = btrfs_stack_inode_sequence(inode_item);
1783         inode->i_rdev = 0;
1784         *rdev = btrfs_stack_inode_rdev(inode_item);
1785         BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1786
1787         inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
1788         inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
1789
1790         inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
1791         inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
1792
1793         inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
1794         inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
1795
1796         BTRFS_I(inode)->i_otime.tv_sec =
1797                 btrfs_stack_timespec_sec(&inode_item->otime);
1798         BTRFS_I(inode)->i_otime.tv_nsec =
1799                 btrfs_stack_timespec_nsec(&inode_item->otime);
1800
1801         inode->i_generation = BTRFS_I(inode)->generation;
1802         BTRFS_I(inode)->index_cnt = (u64)-1;
1803
1804         mutex_unlock(&delayed_node->mutex);
1805         btrfs_release_delayed_node(delayed_node);
1806         return 0;
1807 }
1808
1809 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1810                                struct btrfs_root *root, struct inode *inode)
1811 {
1812         struct btrfs_delayed_node *delayed_node;
1813         int ret = 0;
1814
1815         delayed_node = btrfs_get_or_create_delayed_node(BTRFS_I(inode));
1816         if (IS_ERR(delayed_node))
1817                 return PTR_ERR(delayed_node);
1818
1819         mutex_lock(&delayed_node->mutex);
1820         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1821                 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1822                 goto release_node;
1823         }
1824
1825         ret = btrfs_delayed_inode_reserve_metadata(trans, root, BTRFS_I(inode),
1826                                                    delayed_node);
1827         if (ret)
1828                 goto release_node;
1829
1830         fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1831         set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1832         delayed_node->count++;
1833         atomic_inc(&root->fs_info->delayed_root->items);
1834 release_node:
1835         mutex_unlock(&delayed_node->mutex);
1836         btrfs_release_delayed_node(delayed_node);
1837         return ret;
1838 }
1839
1840 int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
1841 {
1842         struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
1843         struct btrfs_delayed_node *delayed_node;
1844
1845         /*
1846          * we don't do delayed inode updates during log recovery because it
1847          * leads to enospc problems.  This means we also can't do
1848          * delayed inode refs
1849          */
1850         if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
1851                 return -EAGAIN;
1852
1853         delayed_node = btrfs_get_or_create_delayed_node(inode);
1854         if (IS_ERR(delayed_node))
1855                 return PTR_ERR(delayed_node);
1856
1857         /*
1858          * We don't reserve space for inode ref deletion is because:
1859          * - We ONLY do async inode ref deletion for the inode who has only
1860          *   one link(i_nlink == 1), it means there is only one inode ref.
1861          *   And in most case, the inode ref and the inode item are in the
1862          *   same leaf, and we will deal with them at the same time.
1863          *   Since we are sure we will reserve the space for the inode item,
1864          *   it is unnecessary to reserve space for inode ref deletion.
1865          * - If the inode ref and the inode item are not in the same leaf,
1866          *   We also needn't worry about enospc problem, because we reserve
1867          *   much more space for the inode update than it needs.
1868          * - At the worst, we can steal some space from the global reservation.
1869          *   It is very rare.
1870          */
1871         mutex_lock(&delayed_node->mutex);
1872         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1873                 goto release_node;
1874
1875         set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1876         delayed_node->count++;
1877         atomic_inc(&fs_info->delayed_root->items);
1878 release_node:
1879         mutex_unlock(&delayed_node->mutex);
1880         btrfs_release_delayed_node(delayed_node);
1881         return 0;
1882 }
1883
1884 static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1885 {
1886         struct btrfs_root *root = delayed_node->root;
1887         struct btrfs_fs_info *fs_info = root->fs_info;
1888         struct btrfs_delayed_item *curr_item, *prev_item;
1889
1890         mutex_lock(&delayed_node->mutex);
1891         curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1892         while (curr_item) {
1893                 btrfs_delayed_item_release_metadata(fs_info, curr_item);
1894                 prev_item = curr_item;
1895                 curr_item = __btrfs_next_delayed_item(prev_item);
1896                 btrfs_release_delayed_item(prev_item);
1897         }
1898
1899         curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1900         while (curr_item) {
1901                 btrfs_delayed_item_release_metadata(fs_info, curr_item);
1902                 prev_item = curr_item;
1903                 curr_item = __btrfs_next_delayed_item(prev_item);
1904                 btrfs_release_delayed_item(prev_item);
1905         }
1906
1907         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1908                 btrfs_release_delayed_iref(delayed_node);
1909
1910         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1911                 btrfs_delayed_inode_release_metadata(fs_info, delayed_node);
1912                 btrfs_release_delayed_inode(delayed_node);
1913         }
1914         mutex_unlock(&delayed_node->mutex);
1915 }
1916
1917 void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
1918 {
1919         struct btrfs_delayed_node *delayed_node;
1920
1921         delayed_node = btrfs_get_delayed_node(inode);
1922         if (!delayed_node)
1923                 return;
1924
1925         __btrfs_kill_delayed_node(delayed_node);
1926         btrfs_release_delayed_node(delayed_node);
1927 }
1928
1929 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1930 {
1931         u64 inode_id = 0;
1932         struct btrfs_delayed_node *delayed_nodes[8];
1933         int i, n;
1934
1935         while (1) {
1936                 spin_lock(&root->inode_lock);
1937                 n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1938                                            (void **)delayed_nodes, inode_id,
1939                                            ARRAY_SIZE(delayed_nodes));
1940                 if (!n) {
1941                         spin_unlock(&root->inode_lock);
1942                         break;
1943                 }
1944
1945                 inode_id = delayed_nodes[n - 1]->inode_id + 1;
1946
1947                 for (i = 0; i < n; i++)
1948                         refcount_inc(&delayed_nodes[i]->refs);
1949                 spin_unlock(&root->inode_lock);
1950
1951                 for (i = 0; i < n; i++) {
1952                         __btrfs_kill_delayed_node(delayed_nodes[i]);
1953                         btrfs_release_delayed_node(delayed_nodes[i]);
1954                 }
1955         }
1956 }
1957
1958 void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info)
1959 {
1960         struct btrfs_delayed_node *curr_node, *prev_node;
1961
1962         curr_node = btrfs_first_delayed_node(fs_info->delayed_root);
1963         while (curr_node) {
1964                 __btrfs_kill_delayed_node(curr_node);
1965
1966                 prev_node = curr_node;
1967                 curr_node = btrfs_next_delayed_node(curr_node);
1968                 btrfs_release_delayed_node(prev_node);
1969         }
1970 }
1971