Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
[sfrench/cifs-2.6.git] / fs / btrfs / delayed-inode.c
1 /*
2  * Copyright (C) 2011 Fujitsu.  All rights reserved.
3  * Written by Miao Xie <miaox@cn.fujitsu.com>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public
7  * License v2 as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public
15  * License along with this program; if not, write to the
16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17  * Boston, MA 021110-1307, USA.
18  */
19
20 #include <linux/slab.h>
21 #include "delayed-inode.h"
22 #include "disk-io.h"
23 #include "transaction.h"
24 #include "ctree.h"
25
26 #define BTRFS_DELAYED_WRITEBACK         512
27 #define BTRFS_DELAYED_BACKGROUND        128
28 #define BTRFS_DELAYED_BATCH             16
29
30 static struct kmem_cache *delayed_node_cache;
31
32 int __init btrfs_delayed_inode_init(void)
33 {
34         delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
35                                         sizeof(struct btrfs_delayed_node),
36                                         0,
37                                         SLAB_MEM_SPREAD,
38                                         NULL);
39         if (!delayed_node_cache)
40                 return -ENOMEM;
41         return 0;
42 }
43
44 void btrfs_delayed_inode_exit(void)
45 {
46         kmem_cache_destroy(delayed_node_cache);
47 }
48
49 static inline void btrfs_init_delayed_node(
50                                 struct btrfs_delayed_node *delayed_node,
51                                 struct btrfs_root *root, u64 inode_id)
52 {
53         delayed_node->root = root;
54         delayed_node->inode_id = inode_id;
55         refcount_set(&delayed_node->refs, 0);
56         delayed_node->ins_root = RB_ROOT;
57         delayed_node->del_root = RB_ROOT;
58         mutex_init(&delayed_node->mutex);
59         INIT_LIST_HEAD(&delayed_node->n_list);
60         INIT_LIST_HEAD(&delayed_node->p_list);
61 }
62
63 static inline int btrfs_is_continuous_delayed_item(
64                                         struct btrfs_delayed_item *item1,
65                                         struct btrfs_delayed_item *item2)
66 {
67         if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
68             item1->key.objectid == item2->key.objectid &&
69             item1->key.type == item2->key.type &&
70             item1->key.offset + 1 == item2->key.offset)
71                 return 1;
72         return 0;
73 }
74
75 static struct btrfs_delayed_node *btrfs_get_delayed_node(
76                 struct btrfs_inode *btrfs_inode)
77 {
78         struct btrfs_root *root = btrfs_inode->root;
79         u64 ino = btrfs_ino(btrfs_inode);
80         struct btrfs_delayed_node *node;
81
82         node = READ_ONCE(btrfs_inode->delayed_node);
83         if (node) {
84                 refcount_inc(&node->refs);
85                 return node;
86         }
87
88         spin_lock(&root->inode_lock);
89         node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
90
91         if (node) {
92                 if (btrfs_inode->delayed_node) {
93                         refcount_inc(&node->refs);      /* can be accessed */
94                         BUG_ON(btrfs_inode->delayed_node != node);
95                         spin_unlock(&root->inode_lock);
96                         return node;
97                 }
98
99                 /*
100                  * It's possible that we're racing into the middle of removing
101                  * this node from the radix tree.  In this case, the refcount
102                  * was zero and it should never go back to one.  Just return
103                  * NULL like it was never in the radix at all; our release
104                  * function is in the process of removing it.
105                  *
106                  * Some implementations of refcount_inc refuse to bump the
107                  * refcount once it has hit zero.  If we don't do this dance
108                  * here, refcount_inc() may decide to just WARN_ONCE() instead
109                  * of actually bumping the refcount.
110                  *
111                  * If this node is properly in the radix, we want to bump the
112                  * refcount twice, once for the inode and once for this get
113                  * operation.
114                  */
115                 if (refcount_inc_not_zero(&node->refs)) {
116                         refcount_inc(&node->refs);
117                         btrfs_inode->delayed_node = node;
118                 } else {
119                         node = NULL;
120                 }
121
122                 spin_unlock(&root->inode_lock);
123                 return node;
124         }
125         spin_unlock(&root->inode_lock);
126
127         return NULL;
128 }
129
130 /* Will return either the node or PTR_ERR(-ENOMEM) */
131 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
132                 struct btrfs_inode *btrfs_inode)
133 {
134         struct btrfs_delayed_node *node;
135         struct btrfs_root *root = btrfs_inode->root;
136         u64 ino = btrfs_ino(btrfs_inode);
137         int ret;
138
139 again:
140         node = btrfs_get_delayed_node(btrfs_inode);
141         if (node)
142                 return node;
143
144         node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
145         if (!node)
146                 return ERR_PTR(-ENOMEM);
147         btrfs_init_delayed_node(node, root, ino);
148
149         /* cached in the btrfs inode and can be accessed */
150         refcount_set(&node->refs, 2);
151
152         ret = radix_tree_preload(GFP_NOFS);
153         if (ret) {
154                 kmem_cache_free(delayed_node_cache, node);
155                 return ERR_PTR(ret);
156         }
157
158         spin_lock(&root->inode_lock);
159         ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
160         if (ret == -EEXIST) {
161                 spin_unlock(&root->inode_lock);
162                 kmem_cache_free(delayed_node_cache, node);
163                 radix_tree_preload_end();
164                 goto again;
165         }
166         btrfs_inode->delayed_node = node;
167         spin_unlock(&root->inode_lock);
168         radix_tree_preload_end();
169
170         return node;
171 }
172
173 /*
174  * Call it when holding delayed_node->mutex
175  *
176  * If mod = 1, add this node into the prepared list.
177  */
178 static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
179                                      struct btrfs_delayed_node *node,
180                                      int mod)
181 {
182         spin_lock(&root->lock);
183         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
184                 if (!list_empty(&node->p_list))
185                         list_move_tail(&node->p_list, &root->prepare_list);
186                 else if (mod)
187                         list_add_tail(&node->p_list, &root->prepare_list);
188         } else {
189                 list_add_tail(&node->n_list, &root->node_list);
190                 list_add_tail(&node->p_list, &root->prepare_list);
191                 refcount_inc(&node->refs);      /* inserted into list */
192                 root->nodes++;
193                 set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
194         }
195         spin_unlock(&root->lock);
196 }
197
198 /* Call it when holding delayed_node->mutex */
199 static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
200                                        struct btrfs_delayed_node *node)
201 {
202         spin_lock(&root->lock);
203         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
204                 root->nodes--;
205                 refcount_dec(&node->refs);      /* not in the list */
206                 list_del_init(&node->n_list);
207                 if (!list_empty(&node->p_list))
208                         list_del_init(&node->p_list);
209                 clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
210         }
211         spin_unlock(&root->lock);
212 }
213
214 static struct btrfs_delayed_node *btrfs_first_delayed_node(
215                         struct btrfs_delayed_root *delayed_root)
216 {
217         struct list_head *p;
218         struct btrfs_delayed_node *node = NULL;
219
220         spin_lock(&delayed_root->lock);
221         if (list_empty(&delayed_root->node_list))
222                 goto out;
223
224         p = delayed_root->node_list.next;
225         node = list_entry(p, struct btrfs_delayed_node, n_list);
226         refcount_inc(&node->refs);
227 out:
228         spin_unlock(&delayed_root->lock);
229
230         return node;
231 }
232
233 static struct btrfs_delayed_node *btrfs_next_delayed_node(
234                                                 struct btrfs_delayed_node *node)
235 {
236         struct btrfs_delayed_root *delayed_root;
237         struct list_head *p;
238         struct btrfs_delayed_node *next = NULL;
239
240         delayed_root = node->root->fs_info->delayed_root;
241         spin_lock(&delayed_root->lock);
242         if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
243                 /* not in the list */
244                 if (list_empty(&delayed_root->node_list))
245                         goto out;
246                 p = delayed_root->node_list.next;
247         } else if (list_is_last(&node->n_list, &delayed_root->node_list))
248                 goto out;
249         else
250                 p = node->n_list.next;
251
252         next = list_entry(p, struct btrfs_delayed_node, n_list);
253         refcount_inc(&next->refs);
254 out:
255         spin_unlock(&delayed_root->lock);
256
257         return next;
258 }
259
260 static void __btrfs_release_delayed_node(
261                                 struct btrfs_delayed_node *delayed_node,
262                                 int mod)
263 {
264         struct btrfs_delayed_root *delayed_root;
265
266         if (!delayed_node)
267                 return;
268
269         delayed_root = delayed_node->root->fs_info->delayed_root;
270
271         mutex_lock(&delayed_node->mutex);
272         if (delayed_node->count)
273                 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
274         else
275                 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
276         mutex_unlock(&delayed_node->mutex);
277
278         if (refcount_dec_and_test(&delayed_node->refs)) {
279                 struct btrfs_root *root = delayed_node->root;
280
281                 spin_lock(&root->inode_lock);
282                 /*
283                  * Once our refcount goes to zero, nobody is allowed to bump it
284                  * back up.  We can delete it now.
285                  */
286                 ASSERT(refcount_read(&delayed_node->refs) == 0);
287                 radix_tree_delete(&root->delayed_nodes_tree,
288                                   delayed_node->inode_id);
289                 spin_unlock(&root->inode_lock);
290                 kmem_cache_free(delayed_node_cache, delayed_node);
291         }
292 }
293
294 static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
295 {
296         __btrfs_release_delayed_node(node, 0);
297 }
298
299 static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
300                                         struct btrfs_delayed_root *delayed_root)
301 {
302         struct list_head *p;
303         struct btrfs_delayed_node *node = NULL;
304
305         spin_lock(&delayed_root->lock);
306         if (list_empty(&delayed_root->prepare_list))
307                 goto out;
308
309         p = delayed_root->prepare_list.next;
310         list_del_init(p);
311         node = list_entry(p, struct btrfs_delayed_node, p_list);
312         refcount_inc(&node->refs);
313 out:
314         spin_unlock(&delayed_root->lock);
315
316         return node;
317 }
318
319 static inline void btrfs_release_prepared_delayed_node(
320                                         struct btrfs_delayed_node *node)
321 {
322         __btrfs_release_delayed_node(node, 1);
323 }
324
325 static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
326 {
327         struct btrfs_delayed_item *item;
328         item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
329         if (item) {
330                 item->data_len = data_len;
331                 item->ins_or_del = 0;
332                 item->bytes_reserved = 0;
333                 item->delayed_node = NULL;
334                 refcount_set(&item->refs, 1);
335         }
336         return item;
337 }
338
339 /*
340  * __btrfs_lookup_delayed_item - look up the delayed item by key
341  * @delayed_node: pointer to the delayed node
342  * @key:          the key to look up
343  * @prev:         used to store the prev item if the right item isn't found
344  * @next:         used to store the next item if the right item isn't found
345  *
346  * Note: if we don't find the right item, we will return the prev item and
347  * the next item.
348  */
349 static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
350                                 struct rb_root *root,
351                                 struct btrfs_key *key,
352                                 struct btrfs_delayed_item **prev,
353                                 struct btrfs_delayed_item **next)
354 {
355         struct rb_node *node, *prev_node = NULL;
356         struct btrfs_delayed_item *delayed_item = NULL;
357         int ret = 0;
358
359         node = root->rb_node;
360
361         while (node) {
362                 delayed_item = rb_entry(node, struct btrfs_delayed_item,
363                                         rb_node);
364                 prev_node = node;
365                 ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
366                 if (ret < 0)
367                         node = node->rb_right;
368                 else if (ret > 0)
369                         node = node->rb_left;
370                 else
371                         return delayed_item;
372         }
373
374         if (prev) {
375                 if (!prev_node)
376                         *prev = NULL;
377                 else if (ret < 0)
378                         *prev = delayed_item;
379                 else if ((node = rb_prev(prev_node)) != NULL) {
380                         *prev = rb_entry(node, struct btrfs_delayed_item,
381                                          rb_node);
382                 } else
383                         *prev = NULL;
384         }
385
386         if (next) {
387                 if (!prev_node)
388                         *next = NULL;
389                 else if (ret > 0)
390                         *next = delayed_item;
391                 else if ((node = rb_next(prev_node)) != NULL) {
392                         *next = rb_entry(node, struct btrfs_delayed_item,
393                                          rb_node);
394                 } else
395                         *next = NULL;
396         }
397         return NULL;
398 }
399
400 static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
401                                         struct btrfs_delayed_node *delayed_node,
402                                         struct btrfs_key *key)
403 {
404         return __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
405                                            NULL, NULL);
406 }
407
408 static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
409                                     struct btrfs_delayed_item *ins,
410                                     int action)
411 {
412         struct rb_node **p, *node;
413         struct rb_node *parent_node = NULL;
414         struct rb_root *root;
415         struct btrfs_delayed_item *item;
416         int cmp;
417
418         if (action == BTRFS_DELAYED_INSERTION_ITEM)
419                 root = &delayed_node->ins_root;
420         else if (action == BTRFS_DELAYED_DELETION_ITEM)
421                 root = &delayed_node->del_root;
422         else
423                 BUG();
424         p = &root->rb_node;
425         node = &ins->rb_node;
426
427         while (*p) {
428                 parent_node = *p;
429                 item = rb_entry(parent_node, struct btrfs_delayed_item,
430                                  rb_node);
431
432                 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
433                 if (cmp < 0)
434                         p = &(*p)->rb_right;
435                 else if (cmp > 0)
436                         p = &(*p)->rb_left;
437                 else
438                         return -EEXIST;
439         }
440
441         rb_link_node(node, parent_node, p);
442         rb_insert_color(node, root);
443         ins->delayed_node = delayed_node;
444         ins->ins_or_del = action;
445
446         if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
447             action == BTRFS_DELAYED_INSERTION_ITEM &&
448             ins->key.offset >= delayed_node->index_cnt)
449                         delayed_node->index_cnt = ins->key.offset + 1;
450
451         delayed_node->count++;
452         atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
453         return 0;
454 }
455
456 static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
457                                               struct btrfs_delayed_item *item)
458 {
459         return __btrfs_add_delayed_item(node, item,
460                                         BTRFS_DELAYED_INSERTION_ITEM);
461 }
462
463 static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
464                                              struct btrfs_delayed_item *item)
465 {
466         return __btrfs_add_delayed_item(node, item,
467                                         BTRFS_DELAYED_DELETION_ITEM);
468 }
469
470 static void finish_one_item(struct btrfs_delayed_root *delayed_root)
471 {
472         int seq = atomic_inc_return(&delayed_root->items_seq);
473
474         /*
475          * atomic_dec_return implies a barrier for waitqueue_active
476          */
477         if ((atomic_dec_return(&delayed_root->items) <
478             BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
479             waitqueue_active(&delayed_root->wait))
480                 wake_up(&delayed_root->wait);
481 }
482
483 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
484 {
485         struct rb_root *root;
486         struct btrfs_delayed_root *delayed_root;
487
488         delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
489
490         BUG_ON(!delayed_root);
491         BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
492                delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
493
494         if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
495                 root = &delayed_item->delayed_node->ins_root;
496         else
497                 root = &delayed_item->delayed_node->del_root;
498
499         rb_erase(&delayed_item->rb_node, root);
500         delayed_item->delayed_node->count--;
501
502         finish_one_item(delayed_root);
503 }
504
505 static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
506 {
507         if (item) {
508                 __btrfs_remove_delayed_item(item);
509                 if (refcount_dec_and_test(&item->refs))
510                         kfree(item);
511         }
512 }
513
514 static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
515                                         struct btrfs_delayed_node *delayed_node)
516 {
517         struct rb_node *p;
518         struct btrfs_delayed_item *item = NULL;
519
520         p = rb_first(&delayed_node->ins_root);
521         if (p)
522                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
523
524         return item;
525 }
526
527 static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
528                                         struct btrfs_delayed_node *delayed_node)
529 {
530         struct rb_node *p;
531         struct btrfs_delayed_item *item = NULL;
532
533         p = rb_first(&delayed_node->del_root);
534         if (p)
535                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
536
537         return item;
538 }
539
540 static struct btrfs_delayed_item *__btrfs_next_delayed_item(
541                                                 struct btrfs_delayed_item *item)
542 {
543         struct rb_node *p;
544         struct btrfs_delayed_item *next = NULL;
545
546         p = rb_next(&item->rb_node);
547         if (p)
548                 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
549
550         return next;
551 }
552
553 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
554                                                struct btrfs_fs_info *fs_info,
555                                                struct btrfs_delayed_item *item)
556 {
557         struct btrfs_block_rsv *src_rsv;
558         struct btrfs_block_rsv *dst_rsv;
559         u64 num_bytes;
560         int ret;
561
562         if (!trans->bytes_reserved)
563                 return 0;
564
565         src_rsv = trans->block_rsv;
566         dst_rsv = &fs_info->delayed_block_rsv;
567
568         num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
569         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
570         if (!ret) {
571                 trace_btrfs_space_reservation(fs_info, "delayed_item",
572                                               item->key.objectid,
573                                               num_bytes, 1);
574                 item->bytes_reserved = num_bytes;
575         }
576
577         return ret;
578 }
579
580 static void btrfs_delayed_item_release_metadata(struct btrfs_fs_info *fs_info,
581                                                 struct btrfs_delayed_item *item)
582 {
583         struct btrfs_block_rsv *rsv;
584
585         if (!item->bytes_reserved)
586                 return;
587
588         rsv = &fs_info->delayed_block_rsv;
589         trace_btrfs_space_reservation(fs_info, "delayed_item",
590                                       item->key.objectid, item->bytes_reserved,
591                                       0);
592         btrfs_block_rsv_release(fs_info, rsv,
593                                 item->bytes_reserved);
594 }
595
596 static int btrfs_delayed_inode_reserve_metadata(
597                                         struct btrfs_trans_handle *trans,
598                                         struct btrfs_root *root,
599                                         struct btrfs_inode *inode,
600                                         struct btrfs_delayed_node *node)
601 {
602         struct btrfs_fs_info *fs_info = root->fs_info;
603         struct btrfs_block_rsv *src_rsv;
604         struct btrfs_block_rsv *dst_rsv;
605         u64 num_bytes;
606         int ret;
607
608         src_rsv = trans->block_rsv;
609         dst_rsv = &fs_info->delayed_block_rsv;
610
611         num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
612
613         /*
614          * btrfs_dirty_inode will update the inode under btrfs_join_transaction
615          * which doesn't reserve space for speed.  This is a problem since we
616          * still need to reserve space for this update, so try to reserve the
617          * space.
618          *
619          * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
620          * we always reserve enough to update the inode item.
621          */
622         if (!src_rsv || (!trans->bytes_reserved &&
623                          src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
624                 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
625                                           BTRFS_RESERVE_NO_FLUSH);
626                 /*
627                  * Since we're under a transaction reserve_metadata_bytes could
628                  * try to commit the transaction which will make it return
629                  * EAGAIN to make us stop the transaction we have, so return
630                  * ENOSPC instead so that btrfs_dirty_inode knows what to do.
631                  */
632                 if (ret == -EAGAIN)
633                         ret = -ENOSPC;
634                 if (!ret) {
635                         node->bytes_reserved = num_bytes;
636                         trace_btrfs_space_reservation(fs_info,
637                                                       "delayed_inode",
638                                                       btrfs_ino(inode),
639                                                       num_bytes, 1);
640                 }
641                 return ret;
642         }
643
644         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
645         if (!ret) {
646                 trace_btrfs_space_reservation(fs_info, "delayed_inode",
647                                               btrfs_ino(inode), num_bytes, 1);
648                 node->bytes_reserved = num_bytes;
649         }
650
651         return ret;
652 }
653
654 static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
655                                                 struct btrfs_delayed_node *node)
656 {
657         struct btrfs_block_rsv *rsv;
658
659         if (!node->bytes_reserved)
660                 return;
661
662         rsv = &fs_info->delayed_block_rsv;
663         trace_btrfs_space_reservation(fs_info, "delayed_inode",
664                                       node->inode_id, node->bytes_reserved, 0);
665         btrfs_block_rsv_release(fs_info, rsv,
666                                 node->bytes_reserved);
667         node->bytes_reserved = 0;
668 }
669
670 /*
671  * This helper will insert some continuous items into the same leaf according
672  * to the free space of the leaf.
673  */
674 static int btrfs_batch_insert_items(struct btrfs_root *root,
675                                     struct btrfs_path *path,
676                                     struct btrfs_delayed_item *item)
677 {
678         struct btrfs_fs_info *fs_info = root->fs_info;
679         struct btrfs_delayed_item *curr, *next;
680         int free_space;
681         int total_data_size = 0, total_size = 0;
682         struct extent_buffer *leaf;
683         char *data_ptr;
684         struct btrfs_key *keys;
685         u32 *data_size;
686         struct list_head head;
687         int slot;
688         int nitems;
689         int i;
690         int ret = 0;
691
692         BUG_ON(!path->nodes[0]);
693
694         leaf = path->nodes[0];
695         free_space = btrfs_leaf_free_space(fs_info, leaf);
696         INIT_LIST_HEAD(&head);
697
698         next = item;
699         nitems = 0;
700
701         /*
702          * count the number of the continuous items that we can insert in batch
703          */
704         while (total_size + next->data_len + sizeof(struct btrfs_item) <=
705                free_space) {
706                 total_data_size += next->data_len;
707                 total_size += next->data_len + sizeof(struct btrfs_item);
708                 list_add_tail(&next->tree_list, &head);
709                 nitems++;
710
711                 curr = next;
712                 next = __btrfs_next_delayed_item(curr);
713                 if (!next)
714                         break;
715
716                 if (!btrfs_is_continuous_delayed_item(curr, next))
717                         break;
718         }
719
720         if (!nitems) {
721                 ret = 0;
722                 goto out;
723         }
724
725         /*
726          * we need allocate some memory space, but it might cause the task
727          * to sleep, so we set all locked nodes in the path to blocking locks
728          * first.
729          */
730         btrfs_set_path_blocking(path);
731
732         keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
733         if (!keys) {
734                 ret = -ENOMEM;
735                 goto out;
736         }
737
738         data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
739         if (!data_size) {
740                 ret = -ENOMEM;
741                 goto error;
742         }
743
744         /* get keys of all the delayed items */
745         i = 0;
746         list_for_each_entry(next, &head, tree_list) {
747                 keys[i] = next->key;
748                 data_size[i] = next->data_len;
749                 i++;
750         }
751
752         /* reset all the locked nodes in the patch to spinning locks. */
753         btrfs_clear_path_blocking(path, NULL, 0);
754
755         /* insert the keys of the items */
756         setup_items_for_insert(root, path, keys, data_size,
757                                total_data_size, total_size, nitems);
758
759         /* insert the dir index items */
760         slot = path->slots[0];
761         list_for_each_entry_safe(curr, next, &head, tree_list) {
762                 data_ptr = btrfs_item_ptr(leaf, slot, char);
763                 write_extent_buffer(leaf, &curr->data,
764                                     (unsigned long)data_ptr,
765                                     curr->data_len);
766                 slot++;
767
768                 btrfs_delayed_item_release_metadata(fs_info, curr);
769
770                 list_del(&curr->tree_list);
771                 btrfs_release_delayed_item(curr);
772         }
773
774 error:
775         kfree(data_size);
776         kfree(keys);
777 out:
778         return ret;
779 }
780
781 /*
782  * This helper can just do simple insertion that needn't extend item for new
783  * data, such as directory name index insertion, inode insertion.
784  */
785 static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
786                                      struct btrfs_root *root,
787                                      struct btrfs_path *path,
788                                      struct btrfs_delayed_item *delayed_item)
789 {
790         struct btrfs_fs_info *fs_info = root->fs_info;
791         struct extent_buffer *leaf;
792         char *ptr;
793         int ret;
794
795         ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
796                                       delayed_item->data_len);
797         if (ret < 0 && ret != -EEXIST)
798                 return ret;
799
800         leaf = path->nodes[0];
801
802         ptr = btrfs_item_ptr(leaf, path->slots[0], char);
803
804         write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
805                             delayed_item->data_len);
806         btrfs_mark_buffer_dirty(leaf);
807
808         btrfs_delayed_item_release_metadata(fs_info, delayed_item);
809         return 0;
810 }
811
812 /*
813  * we insert an item first, then if there are some continuous items, we try
814  * to insert those items into the same leaf.
815  */
816 static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
817                                       struct btrfs_path *path,
818                                       struct btrfs_root *root,
819                                       struct btrfs_delayed_node *node)
820 {
821         struct btrfs_delayed_item *curr, *prev;
822         int ret = 0;
823
824 do_again:
825         mutex_lock(&node->mutex);
826         curr = __btrfs_first_delayed_insertion_item(node);
827         if (!curr)
828                 goto insert_end;
829
830         ret = btrfs_insert_delayed_item(trans, root, path, curr);
831         if (ret < 0) {
832                 btrfs_release_path(path);
833                 goto insert_end;
834         }
835
836         prev = curr;
837         curr = __btrfs_next_delayed_item(prev);
838         if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
839                 /* insert the continuous items into the same leaf */
840                 path->slots[0]++;
841                 btrfs_batch_insert_items(root, path, curr);
842         }
843         btrfs_release_delayed_item(prev);
844         btrfs_mark_buffer_dirty(path->nodes[0]);
845
846         btrfs_release_path(path);
847         mutex_unlock(&node->mutex);
848         goto do_again;
849
850 insert_end:
851         mutex_unlock(&node->mutex);
852         return ret;
853 }
854
855 static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
856                                     struct btrfs_root *root,
857                                     struct btrfs_path *path,
858                                     struct btrfs_delayed_item *item)
859 {
860         struct btrfs_fs_info *fs_info = root->fs_info;
861         struct btrfs_delayed_item *curr, *next;
862         struct extent_buffer *leaf;
863         struct btrfs_key key;
864         struct list_head head;
865         int nitems, i, last_item;
866         int ret = 0;
867
868         BUG_ON(!path->nodes[0]);
869
870         leaf = path->nodes[0];
871
872         i = path->slots[0];
873         last_item = btrfs_header_nritems(leaf) - 1;
874         if (i > last_item)
875                 return -ENOENT; /* FIXME: Is errno suitable? */
876
877         next = item;
878         INIT_LIST_HEAD(&head);
879         btrfs_item_key_to_cpu(leaf, &key, i);
880         nitems = 0;
881         /*
882          * count the number of the dir index items that we can delete in batch
883          */
884         while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
885                 list_add_tail(&next->tree_list, &head);
886                 nitems++;
887
888                 curr = next;
889                 next = __btrfs_next_delayed_item(curr);
890                 if (!next)
891                         break;
892
893                 if (!btrfs_is_continuous_delayed_item(curr, next))
894                         break;
895
896                 i++;
897                 if (i > last_item)
898                         break;
899                 btrfs_item_key_to_cpu(leaf, &key, i);
900         }
901
902         if (!nitems)
903                 return 0;
904
905         ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
906         if (ret)
907                 goto out;
908
909         list_for_each_entry_safe(curr, next, &head, tree_list) {
910                 btrfs_delayed_item_release_metadata(fs_info, curr);
911                 list_del(&curr->tree_list);
912                 btrfs_release_delayed_item(curr);
913         }
914
915 out:
916         return ret;
917 }
918
919 static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
920                                       struct btrfs_path *path,
921                                       struct btrfs_root *root,
922                                       struct btrfs_delayed_node *node)
923 {
924         struct btrfs_delayed_item *curr, *prev;
925         int ret = 0;
926
927 do_again:
928         mutex_lock(&node->mutex);
929         curr = __btrfs_first_delayed_deletion_item(node);
930         if (!curr)
931                 goto delete_fail;
932
933         ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
934         if (ret < 0)
935                 goto delete_fail;
936         else if (ret > 0) {
937                 /*
938                  * can't find the item which the node points to, so this node
939                  * is invalid, just drop it.
940                  */
941                 prev = curr;
942                 curr = __btrfs_next_delayed_item(prev);
943                 btrfs_release_delayed_item(prev);
944                 ret = 0;
945                 btrfs_release_path(path);
946                 if (curr) {
947                         mutex_unlock(&node->mutex);
948                         goto do_again;
949                 } else
950                         goto delete_fail;
951         }
952
953         btrfs_batch_delete_items(trans, root, path, curr);
954         btrfs_release_path(path);
955         mutex_unlock(&node->mutex);
956         goto do_again;
957
958 delete_fail:
959         btrfs_release_path(path);
960         mutex_unlock(&node->mutex);
961         return ret;
962 }
963
964 static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
965 {
966         struct btrfs_delayed_root *delayed_root;
967
968         if (delayed_node &&
969             test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
970                 BUG_ON(!delayed_node->root);
971                 clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
972                 delayed_node->count--;
973
974                 delayed_root = delayed_node->root->fs_info->delayed_root;
975                 finish_one_item(delayed_root);
976         }
977 }
978
979 static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
980 {
981         struct btrfs_delayed_root *delayed_root;
982
983         ASSERT(delayed_node->root);
984         clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
985         delayed_node->count--;
986
987         delayed_root = delayed_node->root->fs_info->delayed_root;
988         finish_one_item(delayed_root);
989 }
990
991 static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
992                                         struct btrfs_root *root,
993                                         struct btrfs_path *path,
994                                         struct btrfs_delayed_node *node)
995 {
996         struct btrfs_fs_info *fs_info = root->fs_info;
997         struct btrfs_key key;
998         struct btrfs_inode_item *inode_item;
999         struct extent_buffer *leaf;
1000         int mod;
1001         int ret;
1002
1003         key.objectid = node->inode_id;
1004         key.type = BTRFS_INODE_ITEM_KEY;
1005         key.offset = 0;
1006
1007         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1008                 mod = -1;
1009         else
1010                 mod = 1;
1011
1012         ret = btrfs_lookup_inode(trans, root, path, &key, mod);
1013         if (ret > 0) {
1014                 btrfs_release_path(path);
1015                 return -ENOENT;
1016         } else if (ret < 0) {
1017                 return ret;
1018         }
1019
1020         leaf = path->nodes[0];
1021         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1022                                     struct btrfs_inode_item);
1023         write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1024                             sizeof(struct btrfs_inode_item));
1025         btrfs_mark_buffer_dirty(leaf);
1026
1027         if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1028                 goto no_iref;
1029
1030         path->slots[0]++;
1031         if (path->slots[0] >= btrfs_header_nritems(leaf))
1032                 goto search;
1033 again:
1034         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1035         if (key.objectid != node->inode_id)
1036                 goto out;
1037
1038         if (key.type != BTRFS_INODE_REF_KEY &&
1039             key.type != BTRFS_INODE_EXTREF_KEY)
1040                 goto out;
1041
1042         /*
1043          * Delayed iref deletion is for the inode who has only one link,
1044          * so there is only one iref. The case that several irefs are
1045          * in the same item doesn't exist.
1046          */
1047         btrfs_del_item(trans, root, path);
1048 out:
1049         btrfs_release_delayed_iref(node);
1050 no_iref:
1051         btrfs_release_path(path);
1052 err_out:
1053         btrfs_delayed_inode_release_metadata(fs_info, node);
1054         btrfs_release_delayed_inode(node);
1055
1056         return ret;
1057
1058 search:
1059         btrfs_release_path(path);
1060
1061         key.type = BTRFS_INODE_EXTREF_KEY;
1062         key.offset = -1;
1063         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1064         if (ret < 0)
1065                 goto err_out;
1066         ASSERT(ret);
1067
1068         ret = 0;
1069         leaf = path->nodes[0];
1070         path->slots[0]--;
1071         goto again;
1072 }
1073
1074 static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1075                                              struct btrfs_root *root,
1076                                              struct btrfs_path *path,
1077                                              struct btrfs_delayed_node *node)
1078 {
1079         int ret;
1080
1081         mutex_lock(&node->mutex);
1082         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) {
1083                 mutex_unlock(&node->mutex);
1084                 return 0;
1085         }
1086
1087         ret = __btrfs_update_delayed_inode(trans, root, path, node);
1088         mutex_unlock(&node->mutex);
1089         return ret;
1090 }
1091
1092 static inline int
1093 __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1094                                    struct btrfs_path *path,
1095                                    struct btrfs_delayed_node *node)
1096 {
1097         int ret;
1098
1099         ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1100         if (ret)
1101                 return ret;
1102
1103         ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1104         if (ret)
1105                 return ret;
1106
1107         ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1108         return ret;
1109 }
1110
1111 /*
1112  * Called when committing the transaction.
1113  * Returns 0 on success.
1114  * Returns < 0 on error and returns with an aborted transaction with any
1115  * outstanding delayed items cleaned up.
1116  */
1117 static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1118                                      struct btrfs_fs_info *fs_info, int nr)
1119 {
1120         struct btrfs_delayed_root *delayed_root;
1121         struct btrfs_delayed_node *curr_node, *prev_node;
1122         struct btrfs_path *path;
1123         struct btrfs_block_rsv *block_rsv;
1124         int ret = 0;
1125         bool count = (nr > 0);
1126
1127         if (trans->aborted)
1128                 return -EIO;
1129
1130         path = btrfs_alloc_path();
1131         if (!path)
1132                 return -ENOMEM;
1133         path->leave_spinning = 1;
1134
1135         block_rsv = trans->block_rsv;
1136         trans->block_rsv = &fs_info->delayed_block_rsv;
1137
1138         delayed_root = fs_info->delayed_root;
1139
1140         curr_node = btrfs_first_delayed_node(delayed_root);
1141         while (curr_node && (!count || (count && nr--))) {
1142                 ret = __btrfs_commit_inode_delayed_items(trans, path,
1143                                                          curr_node);
1144                 if (ret) {
1145                         btrfs_release_delayed_node(curr_node);
1146                         curr_node = NULL;
1147                         btrfs_abort_transaction(trans, ret);
1148                         break;
1149                 }
1150
1151                 prev_node = curr_node;
1152                 curr_node = btrfs_next_delayed_node(curr_node);
1153                 btrfs_release_delayed_node(prev_node);
1154         }
1155
1156         if (curr_node)
1157                 btrfs_release_delayed_node(curr_node);
1158         btrfs_free_path(path);
1159         trans->block_rsv = block_rsv;
1160
1161         return ret;
1162 }
1163
1164 int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1165                             struct btrfs_fs_info *fs_info)
1166 {
1167         return __btrfs_run_delayed_items(trans, fs_info, -1);
1168 }
1169
1170 int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
1171                                struct btrfs_fs_info *fs_info, int nr)
1172 {
1173         return __btrfs_run_delayed_items(trans, fs_info, nr);
1174 }
1175
1176 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1177                                      struct btrfs_inode *inode)
1178 {
1179         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1180         struct btrfs_path *path;
1181         struct btrfs_block_rsv *block_rsv;
1182         int ret;
1183
1184         if (!delayed_node)
1185                 return 0;
1186
1187         mutex_lock(&delayed_node->mutex);
1188         if (!delayed_node->count) {
1189                 mutex_unlock(&delayed_node->mutex);
1190                 btrfs_release_delayed_node(delayed_node);
1191                 return 0;
1192         }
1193         mutex_unlock(&delayed_node->mutex);
1194
1195         path = btrfs_alloc_path();
1196         if (!path) {
1197                 btrfs_release_delayed_node(delayed_node);
1198                 return -ENOMEM;
1199         }
1200         path->leave_spinning = 1;
1201
1202         block_rsv = trans->block_rsv;
1203         trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1204
1205         ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1206
1207         btrfs_release_delayed_node(delayed_node);
1208         btrfs_free_path(path);
1209         trans->block_rsv = block_rsv;
1210
1211         return ret;
1212 }
1213
1214 int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
1215 {
1216         struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
1217         struct btrfs_trans_handle *trans;
1218         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1219         struct btrfs_path *path;
1220         struct btrfs_block_rsv *block_rsv;
1221         int ret;
1222
1223         if (!delayed_node)
1224                 return 0;
1225
1226         mutex_lock(&delayed_node->mutex);
1227         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1228                 mutex_unlock(&delayed_node->mutex);
1229                 btrfs_release_delayed_node(delayed_node);
1230                 return 0;
1231         }
1232         mutex_unlock(&delayed_node->mutex);
1233
1234         trans = btrfs_join_transaction(delayed_node->root);
1235         if (IS_ERR(trans)) {
1236                 ret = PTR_ERR(trans);
1237                 goto out;
1238         }
1239
1240         path = btrfs_alloc_path();
1241         if (!path) {
1242                 ret = -ENOMEM;
1243                 goto trans_out;
1244         }
1245         path->leave_spinning = 1;
1246
1247         block_rsv = trans->block_rsv;
1248         trans->block_rsv = &fs_info->delayed_block_rsv;
1249
1250         mutex_lock(&delayed_node->mutex);
1251         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags))
1252                 ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1253                                                    path, delayed_node);
1254         else
1255                 ret = 0;
1256         mutex_unlock(&delayed_node->mutex);
1257
1258         btrfs_free_path(path);
1259         trans->block_rsv = block_rsv;
1260 trans_out:
1261         btrfs_end_transaction(trans);
1262         btrfs_btree_balance_dirty(fs_info);
1263 out:
1264         btrfs_release_delayed_node(delayed_node);
1265
1266         return ret;
1267 }
1268
1269 void btrfs_remove_delayed_node(struct btrfs_inode *inode)
1270 {
1271         struct btrfs_delayed_node *delayed_node;
1272
1273         delayed_node = READ_ONCE(inode->delayed_node);
1274         if (!delayed_node)
1275                 return;
1276
1277         inode->delayed_node = NULL;
1278         btrfs_release_delayed_node(delayed_node);
1279 }
1280
1281 struct btrfs_async_delayed_work {
1282         struct btrfs_delayed_root *delayed_root;
1283         int nr;
1284         struct btrfs_work work;
1285 };
1286
1287 static void btrfs_async_run_delayed_root(struct btrfs_work *work)
1288 {
1289         struct btrfs_async_delayed_work *async_work;
1290         struct btrfs_delayed_root *delayed_root;
1291         struct btrfs_trans_handle *trans;
1292         struct btrfs_path *path;
1293         struct btrfs_delayed_node *delayed_node = NULL;
1294         struct btrfs_root *root;
1295         struct btrfs_block_rsv *block_rsv;
1296         int total_done = 0;
1297
1298         async_work = container_of(work, struct btrfs_async_delayed_work, work);
1299         delayed_root = async_work->delayed_root;
1300
1301         path = btrfs_alloc_path();
1302         if (!path)
1303                 goto out;
1304
1305 again:
1306         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2)
1307                 goto free_path;
1308
1309         delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
1310         if (!delayed_node)
1311                 goto free_path;
1312
1313         path->leave_spinning = 1;
1314         root = delayed_node->root;
1315
1316         trans = btrfs_join_transaction(root);
1317         if (IS_ERR(trans))
1318                 goto release_path;
1319
1320         block_rsv = trans->block_rsv;
1321         trans->block_rsv = &root->fs_info->delayed_block_rsv;
1322
1323         __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1324
1325         trans->block_rsv = block_rsv;
1326         btrfs_end_transaction(trans);
1327         btrfs_btree_balance_dirty_nodelay(root->fs_info);
1328
1329 release_path:
1330         btrfs_release_path(path);
1331         total_done++;
1332
1333         btrfs_release_prepared_delayed_node(delayed_node);
1334         if ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) ||
1335             total_done < async_work->nr)
1336                 goto again;
1337
1338 free_path:
1339         btrfs_free_path(path);
1340 out:
1341         wake_up(&delayed_root->wait);
1342         kfree(async_work);
1343 }
1344
1345
1346 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1347                                      struct btrfs_fs_info *fs_info, int nr)
1348 {
1349         struct btrfs_async_delayed_work *async_work;
1350
1351         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND ||
1352             btrfs_workqueue_normal_congested(fs_info->delayed_workers))
1353                 return 0;
1354
1355         async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
1356         if (!async_work)
1357                 return -ENOMEM;
1358
1359         async_work->delayed_root = delayed_root;
1360         btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
1361                         btrfs_async_run_delayed_root, NULL, NULL);
1362         async_work->nr = nr;
1363
1364         btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
1365         return 0;
1366 }
1367
1368 void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
1369 {
1370         WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root));
1371 }
1372
1373 static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
1374 {
1375         int val = atomic_read(&delayed_root->items_seq);
1376
1377         if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
1378                 return 1;
1379
1380         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1381                 return 1;
1382
1383         return 0;
1384 }
1385
1386 void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
1387 {
1388         struct btrfs_delayed_root *delayed_root = fs_info->delayed_root;
1389
1390         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1391                 return;
1392
1393         if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1394                 int seq;
1395                 int ret;
1396
1397                 seq = atomic_read(&delayed_root->items_seq);
1398
1399                 ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0);
1400                 if (ret)
1401                         return;
1402
1403                 wait_event_interruptible(delayed_root->wait,
1404                                          could_end_wait(delayed_root, seq));
1405                 return;
1406         }
1407
1408         btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
1409 }
1410
1411 /* Will return 0 or -ENOMEM */
1412 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1413                                    struct btrfs_fs_info *fs_info,
1414                                    const char *name, int name_len,
1415                                    struct btrfs_inode *dir,
1416                                    struct btrfs_disk_key *disk_key, u8 type,
1417                                    u64 index)
1418 {
1419         struct btrfs_delayed_node *delayed_node;
1420         struct btrfs_delayed_item *delayed_item;
1421         struct btrfs_dir_item *dir_item;
1422         int ret;
1423
1424         delayed_node = btrfs_get_or_create_delayed_node(dir);
1425         if (IS_ERR(delayed_node))
1426                 return PTR_ERR(delayed_node);
1427
1428         delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1429         if (!delayed_item) {
1430                 ret = -ENOMEM;
1431                 goto release_node;
1432         }
1433
1434         delayed_item->key.objectid = btrfs_ino(dir);
1435         delayed_item->key.type = BTRFS_DIR_INDEX_KEY;
1436         delayed_item->key.offset = index;
1437
1438         dir_item = (struct btrfs_dir_item *)delayed_item->data;
1439         dir_item->location = *disk_key;
1440         btrfs_set_stack_dir_transid(dir_item, trans->transid);
1441         btrfs_set_stack_dir_data_len(dir_item, 0);
1442         btrfs_set_stack_dir_name_len(dir_item, name_len);
1443         btrfs_set_stack_dir_type(dir_item, type);
1444         memcpy((char *)(dir_item + 1), name, name_len);
1445
1446         ret = btrfs_delayed_item_reserve_metadata(trans, fs_info, delayed_item);
1447         /*
1448          * we have reserved enough space when we start a new transaction,
1449          * so reserving metadata failure is impossible
1450          */
1451         BUG_ON(ret);
1452
1453
1454         mutex_lock(&delayed_node->mutex);
1455         ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1456         if (unlikely(ret)) {
1457                 btrfs_err(fs_info,
1458                           "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1459                           name_len, name, delayed_node->root->objectid,
1460                           delayed_node->inode_id, ret);
1461                 BUG();
1462         }
1463         mutex_unlock(&delayed_node->mutex);
1464
1465 release_node:
1466         btrfs_release_delayed_node(delayed_node);
1467         return ret;
1468 }
1469
1470 static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
1471                                                struct btrfs_delayed_node *node,
1472                                                struct btrfs_key *key)
1473 {
1474         struct btrfs_delayed_item *item;
1475
1476         mutex_lock(&node->mutex);
1477         item = __btrfs_lookup_delayed_insertion_item(node, key);
1478         if (!item) {
1479                 mutex_unlock(&node->mutex);
1480                 return 1;
1481         }
1482
1483         btrfs_delayed_item_release_metadata(fs_info, item);
1484         btrfs_release_delayed_item(item);
1485         mutex_unlock(&node->mutex);
1486         return 0;
1487 }
1488
1489 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1490                                    struct btrfs_fs_info *fs_info,
1491                                    struct btrfs_inode *dir, u64 index)
1492 {
1493         struct btrfs_delayed_node *node;
1494         struct btrfs_delayed_item *item;
1495         struct btrfs_key item_key;
1496         int ret;
1497
1498         node = btrfs_get_or_create_delayed_node(dir);
1499         if (IS_ERR(node))
1500                 return PTR_ERR(node);
1501
1502         item_key.objectid = btrfs_ino(dir);
1503         item_key.type = BTRFS_DIR_INDEX_KEY;
1504         item_key.offset = index;
1505
1506         ret = btrfs_delete_delayed_insertion_item(fs_info, node, &item_key);
1507         if (!ret)
1508                 goto end;
1509
1510         item = btrfs_alloc_delayed_item(0);
1511         if (!item) {
1512                 ret = -ENOMEM;
1513                 goto end;
1514         }
1515
1516         item->key = item_key;
1517
1518         ret = btrfs_delayed_item_reserve_metadata(trans, fs_info, item);
1519         /*
1520          * we have reserved enough space when we start a new transaction,
1521          * so reserving metadata failure is impossible.
1522          */
1523         BUG_ON(ret);
1524
1525         mutex_lock(&node->mutex);
1526         ret = __btrfs_add_delayed_deletion_item(node, item);
1527         if (unlikely(ret)) {
1528                 btrfs_err(fs_info,
1529                           "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1530                           index, node->root->objectid, node->inode_id, ret);
1531                 BUG();
1532         }
1533         mutex_unlock(&node->mutex);
1534 end:
1535         btrfs_release_delayed_node(node);
1536         return ret;
1537 }
1538
1539 int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
1540 {
1541         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1542
1543         if (!delayed_node)
1544                 return -ENOENT;
1545
1546         /*
1547          * Since we have held i_mutex of this directory, it is impossible that
1548          * a new directory index is added into the delayed node and index_cnt
1549          * is updated now. So we needn't lock the delayed node.
1550          */
1551         if (!delayed_node->index_cnt) {
1552                 btrfs_release_delayed_node(delayed_node);
1553                 return -EINVAL;
1554         }
1555
1556         inode->index_cnt = delayed_node->index_cnt;
1557         btrfs_release_delayed_node(delayed_node);
1558         return 0;
1559 }
1560
1561 bool btrfs_readdir_get_delayed_items(struct inode *inode,
1562                                      struct list_head *ins_list,
1563                                      struct list_head *del_list)
1564 {
1565         struct btrfs_delayed_node *delayed_node;
1566         struct btrfs_delayed_item *item;
1567
1568         delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1569         if (!delayed_node)
1570                 return false;
1571
1572         /*
1573          * We can only do one readdir with delayed items at a time because of
1574          * item->readdir_list.
1575          */
1576         inode_unlock_shared(inode);
1577         inode_lock(inode);
1578
1579         mutex_lock(&delayed_node->mutex);
1580         item = __btrfs_first_delayed_insertion_item(delayed_node);
1581         while (item) {
1582                 refcount_inc(&item->refs);
1583                 list_add_tail(&item->readdir_list, ins_list);
1584                 item = __btrfs_next_delayed_item(item);
1585         }
1586
1587         item = __btrfs_first_delayed_deletion_item(delayed_node);
1588         while (item) {
1589                 refcount_inc(&item->refs);
1590                 list_add_tail(&item->readdir_list, del_list);
1591                 item = __btrfs_next_delayed_item(item);
1592         }
1593         mutex_unlock(&delayed_node->mutex);
1594         /*
1595          * This delayed node is still cached in the btrfs inode, so refs
1596          * must be > 1 now, and we needn't check it is going to be freed
1597          * or not.
1598          *
1599          * Besides that, this function is used to read dir, we do not
1600          * insert/delete delayed items in this period. So we also needn't
1601          * requeue or dequeue this delayed node.
1602          */
1603         refcount_dec(&delayed_node->refs);
1604
1605         return true;
1606 }
1607
1608 void btrfs_readdir_put_delayed_items(struct inode *inode,
1609                                      struct list_head *ins_list,
1610                                      struct list_head *del_list)
1611 {
1612         struct btrfs_delayed_item *curr, *next;
1613
1614         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1615                 list_del(&curr->readdir_list);
1616                 if (refcount_dec_and_test(&curr->refs))
1617                         kfree(curr);
1618         }
1619
1620         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1621                 list_del(&curr->readdir_list);
1622                 if (refcount_dec_and_test(&curr->refs))
1623                         kfree(curr);
1624         }
1625
1626         /*
1627          * The VFS is going to do up_read(), so we need to downgrade back to a
1628          * read lock.
1629          */
1630         downgrade_write(&inode->i_rwsem);
1631 }
1632
1633 int btrfs_should_delete_dir_index(struct list_head *del_list,
1634                                   u64 index)
1635 {
1636         struct btrfs_delayed_item *curr, *next;
1637         int ret;
1638
1639         if (list_empty(del_list))
1640                 return 0;
1641
1642         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1643                 if (curr->key.offset > index)
1644                         break;
1645
1646                 list_del(&curr->readdir_list);
1647                 ret = (curr->key.offset == index);
1648
1649                 if (refcount_dec_and_test(&curr->refs))
1650                         kfree(curr);
1651
1652                 if (ret)
1653                         return 1;
1654                 else
1655                         continue;
1656         }
1657         return 0;
1658 }
1659
1660 /*
1661  * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1662  *
1663  */
1664 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1665                                     struct list_head *ins_list)
1666 {
1667         struct btrfs_dir_item *di;
1668         struct btrfs_delayed_item *curr, *next;
1669         struct btrfs_key location;
1670         char *name;
1671         int name_len;
1672         int over = 0;
1673         unsigned char d_type;
1674
1675         if (list_empty(ins_list))
1676                 return 0;
1677
1678         /*
1679          * Changing the data of the delayed item is impossible. So
1680          * we needn't lock them. And we have held i_mutex of the
1681          * directory, nobody can delete any directory indexes now.
1682          */
1683         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1684                 list_del(&curr->readdir_list);
1685
1686                 if (curr->key.offset < ctx->pos) {
1687                         if (refcount_dec_and_test(&curr->refs))
1688                                 kfree(curr);
1689                         continue;
1690                 }
1691
1692                 ctx->pos = curr->key.offset;
1693
1694                 di = (struct btrfs_dir_item *)curr->data;
1695                 name = (char *)(di + 1);
1696                 name_len = btrfs_stack_dir_name_len(di);
1697
1698                 d_type = btrfs_filetype_table[di->type];
1699                 btrfs_disk_key_to_cpu(&location, &di->location);
1700
1701                 over = !dir_emit(ctx, name, name_len,
1702                                location.objectid, d_type);
1703
1704                 if (refcount_dec_and_test(&curr->refs))
1705                         kfree(curr);
1706
1707                 if (over)
1708                         return 1;
1709                 ctx->pos++;
1710         }
1711         return 0;
1712 }
1713
1714 static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1715                                   struct btrfs_inode_item *inode_item,
1716                                   struct inode *inode)
1717 {
1718         btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
1719         btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
1720         btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1721         btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1722         btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1723         btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1724         btrfs_set_stack_inode_generation(inode_item,
1725                                          BTRFS_I(inode)->generation);
1726         btrfs_set_stack_inode_sequence(inode_item, inode->i_version);
1727         btrfs_set_stack_inode_transid(inode_item, trans->transid);
1728         btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1729         btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1730         btrfs_set_stack_inode_block_group(inode_item, 0);
1731
1732         btrfs_set_stack_timespec_sec(&inode_item->atime,
1733                                      inode->i_atime.tv_sec);
1734         btrfs_set_stack_timespec_nsec(&inode_item->atime,
1735                                       inode->i_atime.tv_nsec);
1736
1737         btrfs_set_stack_timespec_sec(&inode_item->mtime,
1738                                      inode->i_mtime.tv_sec);
1739         btrfs_set_stack_timespec_nsec(&inode_item->mtime,
1740                                       inode->i_mtime.tv_nsec);
1741
1742         btrfs_set_stack_timespec_sec(&inode_item->ctime,
1743                                      inode->i_ctime.tv_sec);
1744         btrfs_set_stack_timespec_nsec(&inode_item->ctime,
1745                                       inode->i_ctime.tv_nsec);
1746
1747         btrfs_set_stack_timespec_sec(&inode_item->otime,
1748                                      BTRFS_I(inode)->i_otime.tv_sec);
1749         btrfs_set_stack_timespec_nsec(&inode_item->otime,
1750                                      BTRFS_I(inode)->i_otime.tv_nsec);
1751 }
1752
1753 int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1754 {
1755         struct btrfs_delayed_node *delayed_node;
1756         struct btrfs_inode_item *inode_item;
1757
1758         delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1759         if (!delayed_node)
1760                 return -ENOENT;
1761
1762         mutex_lock(&delayed_node->mutex);
1763         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1764                 mutex_unlock(&delayed_node->mutex);
1765                 btrfs_release_delayed_node(delayed_node);
1766                 return -ENOENT;
1767         }
1768
1769         inode_item = &delayed_node->inode_item;
1770
1771         i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
1772         i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
1773         btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
1774         inode->i_mode = btrfs_stack_inode_mode(inode_item);
1775         set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1776         inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1777         BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1778         BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
1779
1780         inode->i_version = btrfs_stack_inode_sequence(inode_item);
1781         inode->i_rdev = 0;
1782         *rdev = btrfs_stack_inode_rdev(inode_item);
1783         BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1784
1785         inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
1786         inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
1787
1788         inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
1789         inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
1790
1791         inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
1792         inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
1793
1794         BTRFS_I(inode)->i_otime.tv_sec =
1795                 btrfs_stack_timespec_sec(&inode_item->otime);
1796         BTRFS_I(inode)->i_otime.tv_nsec =
1797                 btrfs_stack_timespec_nsec(&inode_item->otime);
1798
1799         inode->i_generation = BTRFS_I(inode)->generation;
1800         BTRFS_I(inode)->index_cnt = (u64)-1;
1801
1802         mutex_unlock(&delayed_node->mutex);
1803         btrfs_release_delayed_node(delayed_node);
1804         return 0;
1805 }
1806
1807 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1808                                struct btrfs_root *root, struct inode *inode)
1809 {
1810         struct btrfs_delayed_node *delayed_node;
1811         int ret = 0;
1812
1813         delayed_node = btrfs_get_or_create_delayed_node(BTRFS_I(inode));
1814         if (IS_ERR(delayed_node))
1815                 return PTR_ERR(delayed_node);
1816
1817         mutex_lock(&delayed_node->mutex);
1818         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1819                 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1820                 goto release_node;
1821         }
1822
1823         ret = btrfs_delayed_inode_reserve_metadata(trans, root, BTRFS_I(inode),
1824                                                    delayed_node);
1825         if (ret)
1826                 goto release_node;
1827
1828         fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1829         set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1830         delayed_node->count++;
1831         atomic_inc(&root->fs_info->delayed_root->items);
1832 release_node:
1833         mutex_unlock(&delayed_node->mutex);
1834         btrfs_release_delayed_node(delayed_node);
1835         return ret;
1836 }
1837
1838 int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
1839 {
1840         struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
1841         struct btrfs_delayed_node *delayed_node;
1842
1843         /*
1844          * we don't do delayed inode updates during log recovery because it
1845          * leads to enospc problems.  This means we also can't do
1846          * delayed inode refs
1847          */
1848         if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
1849                 return -EAGAIN;
1850
1851         delayed_node = btrfs_get_or_create_delayed_node(inode);
1852         if (IS_ERR(delayed_node))
1853                 return PTR_ERR(delayed_node);
1854
1855         /*
1856          * We don't reserve space for inode ref deletion is because:
1857          * - We ONLY do async inode ref deletion for the inode who has only
1858          *   one link(i_nlink == 1), it means there is only one inode ref.
1859          *   And in most case, the inode ref and the inode item are in the
1860          *   same leaf, and we will deal with them at the same time.
1861          *   Since we are sure we will reserve the space for the inode item,
1862          *   it is unnecessary to reserve space for inode ref deletion.
1863          * - If the inode ref and the inode item are not in the same leaf,
1864          *   We also needn't worry about enospc problem, because we reserve
1865          *   much more space for the inode update than it needs.
1866          * - At the worst, we can steal some space from the global reservation.
1867          *   It is very rare.
1868          */
1869         mutex_lock(&delayed_node->mutex);
1870         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1871                 goto release_node;
1872
1873         set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1874         delayed_node->count++;
1875         atomic_inc(&fs_info->delayed_root->items);
1876 release_node:
1877         mutex_unlock(&delayed_node->mutex);
1878         btrfs_release_delayed_node(delayed_node);
1879         return 0;
1880 }
1881
1882 static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1883 {
1884         struct btrfs_root *root = delayed_node->root;
1885         struct btrfs_fs_info *fs_info = root->fs_info;
1886         struct btrfs_delayed_item *curr_item, *prev_item;
1887
1888         mutex_lock(&delayed_node->mutex);
1889         curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1890         while (curr_item) {
1891                 btrfs_delayed_item_release_metadata(fs_info, curr_item);
1892                 prev_item = curr_item;
1893                 curr_item = __btrfs_next_delayed_item(prev_item);
1894                 btrfs_release_delayed_item(prev_item);
1895         }
1896
1897         curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1898         while (curr_item) {
1899                 btrfs_delayed_item_release_metadata(fs_info, curr_item);
1900                 prev_item = curr_item;
1901                 curr_item = __btrfs_next_delayed_item(prev_item);
1902                 btrfs_release_delayed_item(prev_item);
1903         }
1904
1905         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1906                 btrfs_release_delayed_iref(delayed_node);
1907
1908         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1909                 btrfs_delayed_inode_release_metadata(fs_info, delayed_node);
1910                 btrfs_release_delayed_inode(delayed_node);
1911         }
1912         mutex_unlock(&delayed_node->mutex);
1913 }
1914
1915 void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
1916 {
1917         struct btrfs_delayed_node *delayed_node;
1918
1919         delayed_node = btrfs_get_delayed_node(inode);
1920         if (!delayed_node)
1921                 return;
1922
1923         __btrfs_kill_delayed_node(delayed_node);
1924         btrfs_release_delayed_node(delayed_node);
1925 }
1926
1927 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1928 {
1929         u64 inode_id = 0;
1930         struct btrfs_delayed_node *delayed_nodes[8];
1931         int i, n;
1932
1933         while (1) {
1934                 spin_lock(&root->inode_lock);
1935                 n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1936                                            (void **)delayed_nodes, inode_id,
1937                                            ARRAY_SIZE(delayed_nodes));
1938                 if (!n) {
1939                         spin_unlock(&root->inode_lock);
1940                         break;
1941                 }
1942
1943                 inode_id = delayed_nodes[n - 1]->inode_id + 1;
1944
1945                 for (i = 0; i < n; i++)
1946                         refcount_inc(&delayed_nodes[i]->refs);
1947                 spin_unlock(&root->inode_lock);
1948
1949                 for (i = 0; i < n; i++) {
1950                         __btrfs_kill_delayed_node(delayed_nodes[i]);
1951                         btrfs_release_delayed_node(delayed_nodes[i]);
1952                 }
1953         }
1954 }
1955
1956 void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info)
1957 {
1958         struct btrfs_delayed_node *curr_node, *prev_node;
1959
1960         curr_node = btrfs_first_delayed_node(fs_info->delayed_root);
1961         while (curr_node) {
1962                 __btrfs_kill_delayed_node(curr_node);
1963
1964                 prev_node = curr_node;
1965                 curr_node = btrfs_next_delayed_node(curr_node);
1966                 btrfs_release_delayed_node(prev_node);
1967         }
1968 }
1969