Merge tag 'f2fs-for-v5.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeu...
[sfrench/cifs-2.6.git] / fs / btrfs / delayed-inode.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2011 Fujitsu.  All rights reserved.
4  * Written by Miao Xie <miaox@cn.fujitsu.com>
5  */
6
7 #include <linux/slab.h>
8 #include <linux/iversion.h>
9 #include "delayed-inode.h"
10 #include "disk-io.h"
11 #include "transaction.h"
12 #include "ctree.h"
13 #include "qgroup.h"
14
15 #define BTRFS_DELAYED_WRITEBACK         512
16 #define BTRFS_DELAYED_BACKGROUND        128
17 #define BTRFS_DELAYED_BATCH             16
18
19 static struct kmem_cache *delayed_node_cache;
20
21 int __init btrfs_delayed_inode_init(void)
22 {
23         delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
24                                         sizeof(struct btrfs_delayed_node),
25                                         0,
26                                         SLAB_MEM_SPREAD,
27                                         NULL);
28         if (!delayed_node_cache)
29                 return -ENOMEM;
30         return 0;
31 }
32
33 void __cold btrfs_delayed_inode_exit(void)
34 {
35         kmem_cache_destroy(delayed_node_cache);
36 }
37
38 static inline void btrfs_init_delayed_node(
39                                 struct btrfs_delayed_node *delayed_node,
40                                 struct btrfs_root *root, u64 inode_id)
41 {
42         delayed_node->root = root;
43         delayed_node->inode_id = inode_id;
44         refcount_set(&delayed_node->refs, 0);
45         delayed_node->ins_root = RB_ROOT_CACHED;
46         delayed_node->del_root = RB_ROOT_CACHED;
47         mutex_init(&delayed_node->mutex);
48         INIT_LIST_HEAD(&delayed_node->n_list);
49         INIT_LIST_HEAD(&delayed_node->p_list);
50 }
51
52 static inline int btrfs_is_continuous_delayed_item(
53                                         struct btrfs_delayed_item *item1,
54                                         struct btrfs_delayed_item *item2)
55 {
56         if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
57             item1->key.objectid == item2->key.objectid &&
58             item1->key.type == item2->key.type &&
59             item1->key.offset + 1 == item2->key.offset)
60                 return 1;
61         return 0;
62 }
63
64 static struct btrfs_delayed_node *btrfs_get_delayed_node(
65                 struct btrfs_inode *btrfs_inode)
66 {
67         struct btrfs_root *root = btrfs_inode->root;
68         u64 ino = btrfs_ino(btrfs_inode);
69         struct btrfs_delayed_node *node;
70
71         node = READ_ONCE(btrfs_inode->delayed_node);
72         if (node) {
73                 refcount_inc(&node->refs);
74                 return node;
75         }
76
77         spin_lock(&root->inode_lock);
78         node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
79
80         if (node) {
81                 if (btrfs_inode->delayed_node) {
82                         refcount_inc(&node->refs);      /* can be accessed */
83                         BUG_ON(btrfs_inode->delayed_node != node);
84                         spin_unlock(&root->inode_lock);
85                         return node;
86                 }
87
88                 /*
89                  * It's possible that we're racing into the middle of removing
90                  * this node from the radix tree.  In this case, the refcount
91                  * was zero and it should never go back to one.  Just return
92                  * NULL like it was never in the radix at all; our release
93                  * function is in the process of removing it.
94                  *
95                  * Some implementations of refcount_inc refuse to bump the
96                  * refcount once it has hit zero.  If we don't do this dance
97                  * here, refcount_inc() may decide to just WARN_ONCE() instead
98                  * of actually bumping the refcount.
99                  *
100                  * If this node is properly in the radix, we want to bump the
101                  * refcount twice, once for the inode and once for this get
102                  * operation.
103                  */
104                 if (refcount_inc_not_zero(&node->refs)) {
105                         refcount_inc(&node->refs);
106                         btrfs_inode->delayed_node = node;
107                 } else {
108                         node = NULL;
109                 }
110
111                 spin_unlock(&root->inode_lock);
112                 return node;
113         }
114         spin_unlock(&root->inode_lock);
115
116         return NULL;
117 }
118
119 /* Will return either the node or PTR_ERR(-ENOMEM) */
120 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
121                 struct btrfs_inode *btrfs_inode)
122 {
123         struct btrfs_delayed_node *node;
124         struct btrfs_root *root = btrfs_inode->root;
125         u64 ino = btrfs_ino(btrfs_inode);
126         int ret;
127
128 again:
129         node = btrfs_get_delayed_node(btrfs_inode);
130         if (node)
131                 return node;
132
133         node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
134         if (!node)
135                 return ERR_PTR(-ENOMEM);
136         btrfs_init_delayed_node(node, root, ino);
137
138         /* cached in the btrfs inode and can be accessed */
139         refcount_set(&node->refs, 2);
140
141         ret = radix_tree_preload(GFP_NOFS);
142         if (ret) {
143                 kmem_cache_free(delayed_node_cache, node);
144                 return ERR_PTR(ret);
145         }
146
147         spin_lock(&root->inode_lock);
148         ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
149         if (ret == -EEXIST) {
150                 spin_unlock(&root->inode_lock);
151                 kmem_cache_free(delayed_node_cache, node);
152                 radix_tree_preload_end();
153                 goto again;
154         }
155         btrfs_inode->delayed_node = node;
156         spin_unlock(&root->inode_lock);
157         radix_tree_preload_end();
158
159         return node;
160 }
161
162 /*
163  * Call it when holding delayed_node->mutex
164  *
165  * If mod = 1, add this node into the prepared list.
166  */
167 static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
168                                      struct btrfs_delayed_node *node,
169                                      int mod)
170 {
171         spin_lock(&root->lock);
172         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
173                 if (!list_empty(&node->p_list))
174                         list_move_tail(&node->p_list, &root->prepare_list);
175                 else if (mod)
176                         list_add_tail(&node->p_list, &root->prepare_list);
177         } else {
178                 list_add_tail(&node->n_list, &root->node_list);
179                 list_add_tail(&node->p_list, &root->prepare_list);
180                 refcount_inc(&node->refs);      /* inserted into list */
181                 root->nodes++;
182                 set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
183         }
184         spin_unlock(&root->lock);
185 }
186
187 /* Call it when holding delayed_node->mutex */
188 static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
189                                        struct btrfs_delayed_node *node)
190 {
191         spin_lock(&root->lock);
192         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
193                 root->nodes--;
194                 refcount_dec(&node->refs);      /* not in the list */
195                 list_del_init(&node->n_list);
196                 if (!list_empty(&node->p_list))
197                         list_del_init(&node->p_list);
198                 clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
199         }
200         spin_unlock(&root->lock);
201 }
202
203 static struct btrfs_delayed_node *btrfs_first_delayed_node(
204                         struct btrfs_delayed_root *delayed_root)
205 {
206         struct list_head *p;
207         struct btrfs_delayed_node *node = NULL;
208
209         spin_lock(&delayed_root->lock);
210         if (list_empty(&delayed_root->node_list))
211                 goto out;
212
213         p = delayed_root->node_list.next;
214         node = list_entry(p, struct btrfs_delayed_node, n_list);
215         refcount_inc(&node->refs);
216 out:
217         spin_unlock(&delayed_root->lock);
218
219         return node;
220 }
221
222 static struct btrfs_delayed_node *btrfs_next_delayed_node(
223                                                 struct btrfs_delayed_node *node)
224 {
225         struct btrfs_delayed_root *delayed_root;
226         struct list_head *p;
227         struct btrfs_delayed_node *next = NULL;
228
229         delayed_root = node->root->fs_info->delayed_root;
230         spin_lock(&delayed_root->lock);
231         if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
232                 /* not in the list */
233                 if (list_empty(&delayed_root->node_list))
234                         goto out;
235                 p = delayed_root->node_list.next;
236         } else if (list_is_last(&node->n_list, &delayed_root->node_list))
237                 goto out;
238         else
239                 p = node->n_list.next;
240
241         next = list_entry(p, struct btrfs_delayed_node, n_list);
242         refcount_inc(&next->refs);
243 out:
244         spin_unlock(&delayed_root->lock);
245
246         return next;
247 }
248
249 static void __btrfs_release_delayed_node(
250                                 struct btrfs_delayed_node *delayed_node,
251                                 int mod)
252 {
253         struct btrfs_delayed_root *delayed_root;
254
255         if (!delayed_node)
256                 return;
257
258         delayed_root = delayed_node->root->fs_info->delayed_root;
259
260         mutex_lock(&delayed_node->mutex);
261         if (delayed_node->count)
262                 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
263         else
264                 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
265         mutex_unlock(&delayed_node->mutex);
266
267         if (refcount_dec_and_test(&delayed_node->refs)) {
268                 struct btrfs_root *root = delayed_node->root;
269
270                 spin_lock(&root->inode_lock);
271                 /*
272                  * Once our refcount goes to zero, nobody is allowed to bump it
273                  * back up.  We can delete it now.
274                  */
275                 ASSERT(refcount_read(&delayed_node->refs) == 0);
276                 radix_tree_delete(&root->delayed_nodes_tree,
277                                   delayed_node->inode_id);
278                 spin_unlock(&root->inode_lock);
279                 kmem_cache_free(delayed_node_cache, delayed_node);
280         }
281 }
282
283 static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
284 {
285         __btrfs_release_delayed_node(node, 0);
286 }
287
288 static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
289                                         struct btrfs_delayed_root *delayed_root)
290 {
291         struct list_head *p;
292         struct btrfs_delayed_node *node = NULL;
293
294         spin_lock(&delayed_root->lock);
295         if (list_empty(&delayed_root->prepare_list))
296                 goto out;
297
298         p = delayed_root->prepare_list.next;
299         list_del_init(p);
300         node = list_entry(p, struct btrfs_delayed_node, p_list);
301         refcount_inc(&node->refs);
302 out:
303         spin_unlock(&delayed_root->lock);
304
305         return node;
306 }
307
308 static inline void btrfs_release_prepared_delayed_node(
309                                         struct btrfs_delayed_node *node)
310 {
311         __btrfs_release_delayed_node(node, 1);
312 }
313
314 static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
315 {
316         struct btrfs_delayed_item *item;
317         item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
318         if (item) {
319                 item->data_len = data_len;
320                 item->ins_or_del = 0;
321                 item->bytes_reserved = 0;
322                 item->delayed_node = NULL;
323                 refcount_set(&item->refs, 1);
324         }
325         return item;
326 }
327
328 /*
329  * __btrfs_lookup_delayed_item - look up the delayed item by key
330  * @delayed_node: pointer to the delayed node
331  * @key:          the key to look up
332  * @prev:         used to store the prev item if the right item isn't found
333  * @next:         used to store the next item if the right item isn't found
334  *
335  * Note: if we don't find the right item, we will return the prev item and
336  * the next item.
337  */
338 static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
339                                 struct rb_root *root,
340                                 struct btrfs_key *key,
341                                 struct btrfs_delayed_item **prev,
342                                 struct btrfs_delayed_item **next)
343 {
344         struct rb_node *node, *prev_node = NULL;
345         struct btrfs_delayed_item *delayed_item = NULL;
346         int ret = 0;
347
348         node = root->rb_node;
349
350         while (node) {
351                 delayed_item = rb_entry(node, struct btrfs_delayed_item,
352                                         rb_node);
353                 prev_node = node;
354                 ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
355                 if (ret < 0)
356                         node = node->rb_right;
357                 else if (ret > 0)
358                         node = node->rb_left;
359                 else
360                         return delayed_item;
361         }
362
363         if (prev) {
364                 if (!prev_node)
365                         *prev = NULL;
366                 else if (ret < 0)
367                         *prev = delayed_item;
368                 else if ((node = rb_prev(prev_node)) != NULL) {
369                         *prev = rb_entry(node, struct btrfs_delayed_item,
370                                          rb_node);
371                 } else
372                         *prev = NULL;
373         }
374
375         if (next) {
376                 if (!prev_node)
377                         *next = NULL;
378                 else if (ret > 0)
379                         *next = delayed_item;
380                 else if ((node = rb_next(prev_node)) != NULL) {
381                         *next = rb_entry(node, struct btrfs_delayed_item,
382                                          rb_node);
383                 } else
384                         *next = NULL;
385         }
386         return NULL;
387 }
388
389 static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
390                                         struct btrfs_delayed_node *delayed_node,
391                                         struct btrfs_key *key)
392 {
393         return __btrfs_lookup_delayed_item(&delayed_node->ins_root.rb_root, key,
394                                            NULL, NULL);
395 }
396
397 static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
398                                     struct btrfs_delayed_item *ins,
399                                     int action)
400 {
401         struct rb_node **p, *node;
402         struct rb_node *parent_node = NULL;
403         struct rb_root_cached *root;
404         struct btrfs_delayed_item *item;
405         int cmp;
406         bool leftmost = true;
407
408         if (action == BTRFS_DELAYED_INSERTION_ITEM)
409                 root = &delayed_node->ins_root;
410         else if (action == BTRFS_DELAYED_DELETION_ITEM)
411                 root = &delayed_node->del_root;
412         else
413                 BUG();
414         p = &root->rb_root.rb_node;
415         node = &ins->rb_node;
416
417         while (*p) {
418                 parent_node = *p;
419                 item = rb_entry(parent_node, struct btrfs_delayed_item,
420                                  rb_node);
421
422                 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
423                 if (cmp < 0) {
424                         p = &(*p)->rb_right;
425                         leftmost = false;
426                 } else if (cmp > 0) {
427                         p = &(*p)->rb_left;
428                 } else {
429                         return -EEXIST;
430                 }
431         }
432
433         rb_link_node(node, parent_node, p);
434         rb_insert_color_cached(node, root, leftmost);
435         ins->delayed_node = delayed_node;
436         ins->ins_or_del = action;
437
438         if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
439             action == BTRFS_DELAYED_INSERTION_ITEM &&
440             ins->key.offset >= delayed_node->index_cnt)
441                         delayed_node->index_cnt = ins->key.offset + 1;
442
443         delayed_node->count++;
444         atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
445         return 0;
446 }
447
448 static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
449                                               struct btrfs_delayed_item *item)
450 {
451         return __btrfs_add_delayed_item(node, item,
452                                         BTRFS_DELAYED_INSERTION_ITEM);
453 }
454
455 static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
456                                              struct btrfs_delayed_item *item)
457 {
458         return __btrfs_add_delayed_item(node, item,
459                                         BTRFS_DELAYED_DELETION_ITEM);
460 }
461
462 static void finish_one_item(struct btrfs_delayed_root *delayed_root)
463 {
464         int seq = atomic_inc_return(&delayed_root->items_seq);
465
466         /* atomic_dec_return implies a barrier */
467         if ((atomic_dec_return(&delayed_root->items) <
468             BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0))
469                 cond_wake_up_nomb(&delayed_root->wait);
470 }
471
472 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
473 {
474         struct rb_root_cached *root;
475         struct btrfs_delayed_root *delayed_root;
476
477         delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
478
479         BUG_ON(!delayed_root);
480         BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
481                delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
482
483         if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
484                 root = &delayed_item->delayed_node->ins_root;
485         else
486                 root = &delayed_item->delayed_node->del_root;
487
488         rb_erase_cached(&delayed_item->rb_node, root);
489         delayed_item->delayed_node->count--;
490
491         finish_one_item(delayed_root);
492 }
493
494 static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
495 {
496         if (item) {
497                 __btrfs_remove_delayed_item(item);
498                 if (refcount_dec_and_test(&item->refs))
499                         kfree(item);
500         }
501 }
502
503 static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
504                                         struct btrfs_delayed_node *delayed_node)
505 {
506         struct rb_node *p;
507         struct btrfs_delayed_item *item = NULL;
508
509         p = rb_first_cached(&delayed_node->ins_root);
510         if (p)
511                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
512
513         return item;
514 }
515
516 static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
517                                         struct btrfs_delayed_node *delayed_node)
518 {
519         struct rb_node *p;
520         struct btrfs_delayed_item *item = NULL;
521
522         p = rb_first_cached(&delayed_node->del_root);
523         if (p)
524                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
525
526         return item;
527 }
528
529 static struct btrfs_delayed_item *__btrfs_next_delayed_item(
530                                                 struct btrfs_delayed_item *item)
531 {
532         struct rb_node *p;
533         struct btrfs_delayed_item *next = NULL;
534
535         p = rb_next(&item->rb_node);
536         if (p)
537                 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
538
539         return next;
540 }
541
542 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
543                                                struct btrfs_root *root,
544                                                struct btrfs_delayed_item *item)
545 {
546         struct btrfs_block_rsv *src_rsv;
547         struct btrfs_block_rsv *dst_rsv;
548         struct btrfs_fs_info *fs_info = root->fs_info;
549         u64 num_bytes;
550         int ret;
551
552         if (!trans->bytes_reserved)
553                 return 0;
554
555         src_rsv = trans->block_rsv;
556         dst_rsv = &fs_info->delayed_block_rsv;
557
558         num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
559
560         /*
561          * Here we migrate space rsv from transaction rsv, since have already
562          * reserved space when starting a transaction.  So no need to reserve
563          * qgroup space here.
564          */
565         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, true);
566         if (!ret) {
567                 trace_btrfs_space_reservation(fs_info, "delayed_item",
568                                               item->key.objectid,
569                                               num_bytes, 1);
570                 item->bytes_reserved = num_bytes;
571         }
572
573         return ret;
574 }
575
576 static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
577                                                 struct btrfs_delayed_item *item)
578 {
579         struct btrfs_block_rsv *rsv;
580         struct btrfs_fs_info *fs_info = root->fs_info;
581
582         if (!item->bytes_reserved)
583                 return;
584
585         rsv = &fs_info->delayed_block_rsv;
586         /*
587          * Check btrfs_delayed_item_reserve_metadata() to see why we don't need
588          * to release/reserve qgroup space.
589          */
590         trace_btrfs_space_reservation(fs_info, "delayed_item",
591                                       item->key.objectid, item->bytes_reserved,
592                                       0);
593         btrfs_block_rsv_release(fs_info, rsv,
594                                 item->bytes_reserved);
595 }
596
597 static int btrfs_delayed_inode_reserve_metadata(
598                                         struct btrfs_trans_handle *trans,
599                                         struct btrfs_root *root,
600                                         struct btrfs_inode *inode,
601                                         struct btrfs_delayed_node *node)
602 {
603         struct btrfs_fs_info *fs_info = root->fs_info;
604         struct btrfs_block_rsv *src_rsv;
605         struct btrfs_block_rsv *dst_rsv;
606         u64 num_bytes;
607         int ret;
608
609         src_rsv = trans->block_rsv;
610         dst_rsv = &fs_info->delayed_block_rsv;
611
612         num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
613
614         /*
615          * btrfs_dirty_inode will update the inode under btrfs_join_transaction
616          * which doesn't reserve space for speed.  This is a problem since we
617          * still need to reserve space for this update, so try to reserve the
618          * space.
619          *
620          * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
621          * we always reserve enough to update the inode item.
622          */
623         if (!src_rsv || (!trans->bytes_reserved &&
624                          src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
625                 ret = btrfs_qgroup_reserve_meta_prealloc(root,
626                                 fs_info->nodesize, true);
627                 if (ret < 0)
628                         return ret;
629                 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
630                                           BTRFS_RESERVE_NO_FLUSH);
631                 /*
632                  * Since we're under a transaction reserve_metadata_bytes could
633                  * try to commit the transaction which will make it return
634                  * EAGAIN to make us stop the transaction we have, so return
635                  * ENOSPC instead so that btrfs_dirty_inode knows what to do.
636                  */
637                 if (ret == -EAGAIN) {
638                         ret = -ENOSPC;
639                         btrfs_qgroup_free_meta_prealloc(root, num_bytes);
640                 }
641                 if (!ret) {
642                         node->bytes_reserved = num_bytes;
643                         trace_btrfs_space_reservation(fs_info,
644                                                       "delayed_inode",
645                                                       btrfs_ino(inode),
646                                                       num_bytes, 1);
647                 } else {
648                         btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize);
649                 }
650                 return ret;
651         }
652
653         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, true);
654         if (!ret) {
655                 trace_btrfs_space_reservation(fs_info, "delayed_inode",
656                                               btrfs_ino(inode), num_bytes, 1);
657                 node->bytes_reserved = num_bytes;
658         }
659
660         return ret;
661 }
662
663 static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
664                                                 struct btrfs_delayed_node *node,
665                                                 bool qgroup_free)
666 {
667         struct btrfs_block_rsv *rsv;
668
669         if (!node->bytes_reserved)
670                 return;
671
672         rsv = &fs_info->delayed_block_rsv;
673         trace_btrfs_space_reservation(fs_info, "delayed_inode",
674                                       node->inode_id, node->bytes_reserved, 0);
675         btrfs_block_rsv_release(fs_info, rsv,
676                                 node->bytes_reserved);
677         if (qgroup_free)
678                 btrfs_qgroup_free_meta_prealloc(node->root,
679                                 node->bytes_reserved);
680         else
681                 btrfs_qgroup_convert_reserved_meta(node->root,
682                                 node->bytes_reserved);
683         node->bytes_reserved = 0;
684 }
685
686 /*
687  * This helper will insert some continuous items into the same leaf according
688  * to the free space of the leaf.
689  */
690 static int btrfs_batch_insert_items(struct btrfs_root *root,
691                                     struct btrfs_path *path,
692                                     struct btrfs_delayed_item *item)
693 {
694         struct btrfs_delayed_item *curr, *next;
695         int free_space;
696         int total_data_size = 0, total_size = 0;
697         struct extent_buffer *leaf;
698         char *data_ptr;
699         struct btrfs_key *keys;
700         u32 *data_size;
701         struct list_head head;
702         int slot;
703         int nitems;
704         int i;
705         int ret = 0;
706
707         BUG_ON(!path->nodes[0]);
708
709         leaf = path->nodes[0];
710         free_space = btrfs_leaf_free_space(leaf);
711         INIT_LIST_HEAD(&head);
712
713         next = item;
714         nitems = 0;
715
716         /*
717          * count the number of the continuous items that we can insert in batch
718          */
719         while (total_size + next->data_len + sizeof(struct btrfs_item) <=
720                free_space) {
721                 total_data_size += next->data_len;
722                 total_size += next->data_len + sizeof(struct btrfs_item);
723                 list_add_tail(&next->tree_list, &head);
724                 nitems++;
725
726                 curr = next;
727                 next = __btrfs_next_delayed_item(curr);
728                 if (!next)
729                         break;
730
731                 if (!btrfs_is_continuous_delayed_item(curr, next))
732                         break;
733         }
734
735         if (!nitems) {
736                 ret = 0;
737                 goto out;
738         }
739
740         /*
741          * we need allocate some memory space, but it might cause the task
742          * to sleep, so we set all locked nodes in the path to blocking locks
743          * first.
744          */
745         btrfs_set_path_blocking(path);
746
747         keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
748         if (!keys) {
749                 ret = -ENOMEM;
750                 goto out;
751         }
752
753         data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
754         if (!data_size) {
755                 ret = -ENOMEM;
756                 goto error;
757         }
758
759         /* get keys of all the delayed items */
760         i = 0;
761         list_for_each_entry(next, &head, tree_list) {
762                 keys[i] = next->key;
763                 data_size[i] = next->data_len;
764                 i++;
765         }
766
767         /* insert the keys of the items */
768         setup_items_for_insert(root, path, keys, data_size,
769                                total_data_size, total_size, nitems);
770
771         /* insert the dir index items */
772         slot = path->slots[0];
773         list_for_each_entry_safe(curr, next, &head, tree_list) {
774                 data_ptr = btrfs_item_ptr(leaf, slot, char);
775                 write_extent_buffer(leaf, &curr->data,
776                                     (unsigned long)data_ptr,
777                                     curr->data_len);
778                 slot++;
779
780                 btrfs_delayed_item_release_metadata(root, curr);
781
782                 list_del(&curr->tree_list);
783                 btrfs_release_delayed_item(curr);
784         }
785
786 error:
787         kfree(data_size);
788         kfree(keys);
789 out:
790         return ret;
791 }
792
793 /*
794  * This helper can just do simple insertion that needn't extend item for new
795  * data, such as directory name index insertion, inode insertion.
796  */
797 static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
798                                      struct btrfs_root *root,
799                                      struct btrfs_path *path,
800                                      struct btrfs_delayed_item *delayed_item)
801 {
802         struct extent_buffer *leaf;
803         char *ptr;
804         int ret;
805
806         ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
807                                       delayed_item->data_len);
808         if (ret < 0 && ret != -EEXIST)
809                 return ret;
810
811         leaf = path->nodes[0];
812
813         ptr = btrfs_item_ptr(leaf, path->slots[0], char);
814
815         write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
816                             delayed_item->data_len);
817         btrfs_mark_buffer_dirty(leaf);
818
819         btrfs_delayed_item_release_metadata(root, delayed_item);
820         return 0;
821 }
822
823 /*
824  * we insert an item first, then if there are some continuous items, we try
825  * to insert those items into the same leaf.
826  */
827 static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
828                                       struct btrfs_path *path,
829                                       struct btrfs_root *root,
830                                       struct btrfs_delayed_node *node)
831 {
832         struct btrfs_delayed_item *curr, *prev;
833         int ret = 0;
834
835 do_again:
836         mutex_lock(&node->mutex);
837         curr = __btrfs_first_delayed_insertion_item(node);
838         if (!curr)
839                 goto insert_end;
840
841         ret = btrfs_insert_delayed_item(trans, root, path, curr);
842         if (ret < 0) {
843                 btrfs_release_path(path);
844                 goto insert_end;
845         }
846
847         prev = curr;
848         curr = __btrfs_next_delayed_item(prev);
849         if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
850                 /* insert the continuous items into the same leaf */
851                 path->slots[0]++;
852                 btrfs_batch_insert_items(root, path, curr);
853         }
854         btrfs_release_delayed_item(prev);
855         btrfs_mark_buffer_dirty(path->nodes[0]);
856
857         btrfs_release_path(path);
858         mutex_unlock(&node->mutex);
859         goto do_again;
860
861 insert_end:
862         mutex_unlock(&node->mutex);
863         return ret;
864 }
865
866 static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
867                                     struct btrfs_root *root,
868                                     struct btrfs_path *path,
869                                     struct btrfs_delayed_item *item)
870 {
871         struct btrfs_delayed_item *curr, *next;
872         struct extent_buffer *leaf;
873         struct btrfs_key key;
874         struct list_head head;
875         int nitems, i, last_item;
876         int ret = 0;
877
878         BUG_ON(!path->nodes[0]);
879
880         leaf = path->nodes[0];
881
882         i = path->slots[0];
883         last_item = btrfs_header_nritems(leaf) - 1;
884         if (i > last_item)
885                 return -ENOENT; /* FIXME: Is errno suitable? */
886
887         next = item;
888         INIT_LIST_HEAD(&head);
889         btrfs_item_key_to_cpu(leaf, &key, i);
890         nitems = 0;
891         /*
892          * count the number of the dir index items that we can delete in batch
893          */
894         while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
895                 list_add_tail(&next->tree_list, &head);
896                 nitems++;
897
898                 curr = next;
899                 next = __btrfs_next_delayed_item(curr);
900                 if (!next)
901                         break;
902
903                 if (!btrfs_is_continuous_delayed_item(curr, next))
904                         break;
905
906                 i++;
907                 if (i > last_item)
908                         break;
909                 btrfs_item_key_to_cpu(leaf, &key, i);
910         }
911
912         if (!nitems)
913                 return 0;
914
915         ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
916         if (ret)
917                 goto out;
918
919         list_for_each_entry_safe(curr, next, &head, tree_list) {
920                 btrfs_delayed_item_release_metadata(root, curr);
921                 list_del(&curr->tree_list);
922                 btrfs_release_delayed_item(curr);
923         }
924
925 out:
926         return ret;
927 }
928
929 static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
930                                       struct btrfs_path *path,
931                                       struct btrfs_root *root,
932                                       struct btrfs_delayed_node *node)
933 {
934         struct btrfs_delayed_item *curr, *prev;
935         int ret = 0;
936
937 do_again:
938         mutex_lock(&node->mutex);
939         curr = __btrfs_first_delayed_deletion_item(node);
940         if (!curr)
941                 goto delete_fail;
942
943         ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
944         if (ret < 0)
945                 goto delete_fail;
946         else if (ret > 0) {
947                 /*
948                  * can't find the item which the node points to, so this node
949                  * is invalid, just drop it.
950                  */
951                 prev = curr;
952                 curr = __btrfs_next_delayed_item(prev);
953                 btrfs_release_delayed_item(prev);
954                 ret = 0;
955                 btrfs_release_path(path);
956                 if (curr) {
957                         mutex_unlock(&node->mutex);
958                         goto do_again;
959                 } else
960                         goto delete_fail;
961         }
962
963         btrfs_batch_delete_items(trans, root, path, curr);
964         btrfs_release_path(path);
965         mutex_unlock(&node->mutex);
966         goto do_again;
967
968 delete_fail:
969         btrfs_release_path(path);
970         mutex_unlock(&node->mutex);
971         return ret;
972 }
973
974 static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
975 {
976         struct btrfs_delayed_root *delayed_root;
977
978         if (delayed_node &&
979             test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
980                 BUG_ON(!delayed_node->root);
981                 clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
982                 delayed_node->count--;
983
984                 delayed_root = delayed_node->root->fs_info->delayed_root;
985                 finish_one_item(delayed_root);
986         }
987 }
988
989 static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
990 {
991         struct btrfs_delayed_root *delayed_root;
992
993         ASSERT(delayed_node->root);
994         clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
995         delayed_node->count--;
996
997         delayed_root = delayed_node->root->fs_info->delayed_root;
998         finish_one_item(delayed_root);
999 }
1000
1001 static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1002                                         struct btrfs_root *root,
1003                                         struct btrfs_path *path,
1004                                         struct btrfs_delayed_node *node)
1005 {
1006         struct btrfs_fs_info *fs_info = root->fs_info;
1007         struct btrfs_key key;
1008         struct btrfs_inode_item *inode_item;
1009         struct extent_buffer *leaf;
1010         int mod;
1011         int ret;
1012
1013         key.objectid = node->inode_id;
1014         key.type = BTRFS_INODE_ITEM_KEY;
1015         key.offset = 0;
1016
1017         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1018                 mod = -1;
1019         else
1020                 mod = 1;
1021
1022         ret = btrfs_lookup_inode(trans, root, path, &key, mod);
1023         if (ret > 0) {
1024                 btrfs_release_path(path);
1025                 return -ENOENT;
1026         } else if (ret < 0) {
1027                 return ret;
1028         }
1029
1030         leaf = path->nodes[0];
1031         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1032                                     struct btrfs_inode_item);
1033         write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1034                             sizeof(struct btrfs_inode_item));
1035         btrfs_mark_buffer_dirty(leaf);
1036
1037         if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1038                 goto no_iref;
1039
1040         path->slots[0]++;
1041         if (path->slots[0] >= btrfs_header_nritems(leaf))
1042                 goto search;
1043 again:
1044         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1045         if (key.objectid != node->inode_id)
1046                 goto out;
1047
1048         if (key.type != BTRFS_INODE_REF_KEY &&
1049             key.type != BTRFS_INODE_EXTREF_KEY)
1050                 goto out;
1051
1052         /*
1053          * Delayed iref deletion is for the inode who has only one link,
1054          * so there is only one iref. The case that several irefs are
1055          * in the same item doesn't exist.
1056          */
1057         btrfs_del_item(trans, root, path);
1058 out:
1059         btrfs_release_delayed_iref(node);
1060 no_iref:
1061         btrfs_release_path(path);
1062 err_out:
1063         btrfs_delayed_inode_release_metadata(fs_info, node, (ret < 0));
1064         btrfs_release_delayed_inode(node);
1065
1066         return ret;
1067
1068 search:
1069         btrfs_release_path(path);
1070
1071         key.type = BTRFS_INODE_EXTREF_KEY;
1072         key.offset = -1;
1073         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1074         if (ret < 0)
1075                 goto err_out;
1076         ASSERT(ret);
1077
1078         ret = 0;
1079         leaf = path->nodes[0];
1080         path->slots[0]--;
1081         goto again;
1082 }
1083
1084 static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1085                                              struct btrfs_root *root,
1086                                              struct btrfs_path *path,
1087                                              struct btrfs_delayed_node *node)
1088 {
1089         int ret;
1090
1091         mutex_lock(&node->mutex);
1092         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) {
1093                 mutex_unlock(&node->mutex);
1094                 return 0;
1095         }
1096
1097         ret = __btrfs_update_delayed_inode(trans, root, path, node);
1098         mutex_unlock(&node->mutex);
1099         return ret;
1100 }
1101
1102 static inline int
1103 __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1104                                    struct btrfs_path *path,
1105                                    struct btrfs_delayed_node *node)
1106 {
1107         int ret;
1108
1109         ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1110         if (ret)
1111                 return ret;
1112
1113         ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1114         if (ret)
1115                 return ret;
1116
1117         ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1118         return ret;
1119 }
1120
1121 /*
1122  * Called when committing the transaction.
1123  * Returns 0 on success.
1124  * Returns < 0 on error and returns with an aborted transaction with any
1125  * outstanding delayed items cleaned up.
1126  */
1127 static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
1128 {
1129         struct btrfs_fs_info *fs_info = trans->fs_info;
1130         struct btrfs_delayed_root *delayed_root;
1131         struct btrfs_delayed_node *curr_node, *prev_node;
1132         struct btrfs_path *path;
1133         struct btrfs_block_rsv *block_rsv;
1134         int ret = 0;
1135         bool count = (nr > 0);
1136
1137         if (trans->aborted)
1138                 return -EIO;
1139
1140         path = btrfs_alloc_path();
1141         if (!path)
1142                 return -ENOMEM;
1143         path->leave_spinning = 1;
1144
1145         block_rsv = trans->block_rsv;
1146         trans->block_rsv = &fs_info->delayed_block_rsv;
1147
1148         delayed_root = fs_info->delayed_root;
1149
1150         curr_node = btrfs_first_delayed_node(delayed_root);
1151         while (curr_node && (!count || (count && nr--))) {
1152                 ret = __btrfs_commit_inode_delayed_items(trans, path,
1153                                                          curr_node);
1154                 if (ret) {
1155                         btrfs_release_delayed_node(curr_node);
1156                         curr_node = NULL;
1157                         btrfs_abort_transaction(trans, ret);
1158                         break;
1159                 }
1160
1161                 prev_node = curr_node;
1162                 curr_node = btrfs_next_delayed_node(curr_node);
1163                 btrfs_release_delayed_node(prev_node);
1164         }
1165
1166         if (curr_node)
1167                 btrfs_release_delayed_node(curr_node);
1168         btrfs_free_path(path);
1169         trans->block_rsv = block_rsv;
1170
1171         return ret;
1172 }
1173
1174 int btrfs_run_delayed_items(struct btrfs_trans_handle *trans)
1175 {
1176         return __btrfs_run_delayed_items(trans, -1);
1177 }
1178
1179 int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr)
1180 {
1181         return __btrfs_run_delayed_items(trans, nr);
1182 }
1183
1184 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1185                                      struct btrfs_inode *inode)
1186 {
1187         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1188         struct btrfs_path *path;
1189         struct btrfs_block_rsv *block_rsv;
1190         int ret;
1191
1192         if (!delayed_node)
1193                 return 0;
1194
1195         mutex_lock(&delayed_node->mutex);
1196         if (!delayed_node->count) {
1197                 mutex_unlock(&delayed_node->mutex);
1198                 btrfs_release_delayed_node(delayed_node);
1199                 return 0;
1200         }
1201         mutex_unlock(&delayed_node->mutex);
1202
1203         path = btrfs_alloc_path();
1204         if (!path) {
1205                 btrfs_release_delayed_node(delayed_node);
1206                 return -ENOMEM;
1207         }
1208         path->leave_spinning = 1;
1209
1210         block_rsv = trans->block_rsv;
1211         trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1212
1213         ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1214
1215         btrfs_release_delayed_node(delayed_node);
1216         btrfs_free_path(path);
1217         trans->block_rsv = block_rsv;
1218
1219         return ret;
1220 }
1221
1222 int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
1223 {
1224         struct btrfs_fs_info *fs_info = inode->root->fs_info;
1225         struct btrfs_trans_handle *trans;
1226         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1227         struct btrfs_path *path;
1228         struct btrfs_block_rsv *block_rsv;
1229         int ret;
1230
1231         if (!delayed_node)
1232                 return 0;
1233
1234         mutex_lock(&delayed_node->mutex);
1235         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1236                 mutex_unlock(&delayed_node->mutex);
1237                 btrfs_release_delayed_node(delayed_node);
1238                 return 0;
1239         }
1240         mutex_unlock(&delayed_node->mutex);
1241
1242         trans = btrfs_join_transaction(delayed_node->root);
1243         if (IS_ERR(trans)) {
1244                 ret = PTR_ERR(trans);
1245                 goto out;
1246         }
1247
1248         path = btrfs_alloc_path();
1249         if (!path) {
1250                 ret = -ENOMEM;
1251                 goto trans_out;
1252         }
1253         path->leave_spinning = 1;
1254
1255         block_rsv = trans->block_rsv;
1256         trans->block_rsv = &fs_info->delayed_block_rsv;
1257
1258         mutex_lock(&delayed_node->mutex);
1259         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags))
1260                 ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1261                                                    path, delayed_node);
1262         else
1263                 ret = 0;
1264         mutex_unlock(&delayed_node->mutex);
1265
1266         btrfs_free_path(path);
1267         trans->block_rsv = block_rsv;
1268 trans_out:
1269         btrfs_end_transaction(trans);
1270         btrfs_btree_balance_dirty(fs_info);
1271 out:
1272         btrfs_release_delayed_node(delayed_node);
1273
1274         return ret;
1275 }
1276
1277 void btrfs_remove_delayed_node(struct btrfs_inode *inode)
1278 {
1279         struct btrfs_delayed_node *delayed_node;
1280
1281         delayed_node = READ_ONCE(inode->delayed_node);
1282         if (!delayed_node)
1283                 return;
1284
1285         inode->delayed_node = NULL;
1286         btrfs_release_delayed_node(delayed_node);
1287 }
1288
1289 struct btrfs_async_delayed_work {
1290         struct btrfs_delayed_root *delayed_root;
1291         int nr;
1292         struct btrfs_work work;
1293 };
1294
1295 static void btrfs_async_run_delayed_root(struct btrfs_work *work)
1296 {
1297         struct btrfs_async_delayed_work *async_work;
1298         struct btrfs_delayed_root *delayed_root;
1299         struct btrfs_trans_handle *trans;
1300         struct btrfs_path *path;
1301         struct btrfs_delayed_node *delayed_node = NULL;
1302         struct btrfs_root *root;
1303         struct btrfs_block_rsv *block_rsv;
1304         int total_done = 0;
1305
1306         async_work = container_of(work, struct btrfs_async_delayed_work, work);
1307         delayed_root = async_work->delayed_root;
1308
1309         path = btrfs_alloc_path();
1310         if (!path)
1311                 goto out;
1312
1313         do {
1314                 if (atomic_read(&delayed_root->items) <
1315                     BTRFS_DELAYED_BACKGROUND / 2)
1316                         break;
1317
1318                 delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
1319                 if (!delayed_node)
1320                         break;
1321
1322                 path->leave_spinning = 1;
1323                 root = delayed_node->root;
1324
1325                 trans = btrfs_join_transaction(root);
1326                 if (IS_ERR(trans)) {
1327                         btrfs_release_path(path);
1328                         btrfs_release_prepared_delayed_node(delayed_node);
1329                         total_done++;
1330                         continue;
1331                 }
1332
1333                 block_rsv = trans->block_rsv;
1334                 trans->block_rsv = &root->fs_info->delayed_block_rsv;
1335
1336                 __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1337
1338                 trans->block_rsv = block_rsv;
1339                 btrfs_end_transaction(trans);
1340                 btrfs_btree_balance_dirty_nodelay(root->fs_info);
1341
1342                 btrfs_release_path(path);
1343                 btrfs_release_prepared_delayed_node(delayed_node);
1344                 total_done++;
1345
1346         } while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK)
1347                  || total_done < async_work->nr);
1348
1349         btrfs_free_path(path);
1350 out:
1351         wake_up(&delayed_root->wait);
1352         kfree(async_work);
1353 }
1354
1355
1356 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1357                                      struct btrfs_fs_info *fs_info, int nr)
1358 {
1359         struct btrfs_async_delayed_work *async_work;
1360
1361         async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
1362         if (!async_work)
1363                 return -ENOMEM;
1364
1365         async_work->delayed_root = delayed_root;
1366         btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
1367                         btrfs_async_run_delayed_root, NULL, NULL);
1368         async_work->nr = nr;
1369
1370         btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
1371         return 0;
1372 }
1373
1374 void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
1375 {
1376         WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root));
1377 }
1378
1379 static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
1380 {
1381         int val = atomic_read(&delayed_root->items_seq);
1382
1383         if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
1384                 return 1;
1385
1386         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1387                 return 1;
1388
1389         return 0;
1390 }
1391
1392 void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
1393 {
1394         struct btrfs_delayed_root *delayed_root = fs_info->delayed_root;
1395
1396         if ((atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) ||
1397                 btrfs_workqueue_normal_congested(fs_info->delayed_workers))
1398                 return;
1399
1400         if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1401                 int seq;
1402                 int ret;
1403
1404                 seq = atomic_read(&delayed_root->items_seq);
1405
1406                 ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0);
1407                 if (ret)
1408                         return;
1409
1410                 wait_event_interruptible(delayed_root->wait,
1411                                          could_end_wait(delayed_root, seq));
1412                 return;
1413         }
1414
1415         btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
1416 }
1417
1418 /* Will return 0 or -ENOMEM */
1419 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1420                                    const char *name, int name_len,
1421                                    struct btrfs_inode *dir,
1422                                    struct btrfs_disk_key *disk_key, u8 type,
1423                                    u64 index)
1424 {
1425         struct btrfs_delayed_node *delayed_node;
1426         struct btrfs_delayed_item *delayed_item;
1427         struct btrfs_dir_item *dir_item;
1428         int ret;
1429
1430         delayed_node = btrfs_get_or_create_delayed_node(dir);
1431         if (IS_ERR(delayed_node))
1432                 return PTR_ERR(delayed_node);
1433
1434         delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1435         if (!delayed_item) {
1436                 ret = -ENOMEM;
1437                 goto release_node;
1438         }
1439
1440         delayed_item->key.objectid = btrfs_ino(dir);
1441         delayed_item->key.type = BTRFS_DIR_INDEX_KEY;
1442         delayed_item->key.offset = index;
1443
1444         dir_item = (struct btrfs_dir_item *)delayed_item->data;
1445         dir_item->location = *disk_key;
1446         btrfs_set_stack_dir_transid(dir_item, trans->transid);
1447         btrfs_set_stack_dir_data_len(dir_item, 0);
1448         btrfs_set_stack_dir_name_len(dir_item, name_len);
1449         btrfs_set_stack_dir_type(dir_item, type);
1450         memcpy((char *)(dir_item + 1), name, name_len);
1451
1452         ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, delayed_item);
1453         /*
1454          * we have reserved enough space when we start a new transaction,
1455          * so reserving metadata failure is impossible
1456          */
1457         BUG_ON(ret);
1458
1459         mutex_lock(&delayed_node->mutex);
1460         ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1461         if (unlikely(ret)) {
1462                 btrfs_err(trans->fs_info,
1463                           "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1464                           name_len, name, delayed_node->root->root_key.objectid,
1465                           delayed_node->inode_id, ret);
1466                 BUG();
1467         }
1468         mutex_unlock(&delayed_node->mutex);
1469
1470 release_node:
1471         btrfs_release_delayed_node(delayed_node);
1472         return ret;
1473 }
1474
1475 static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
1476                                                struct btrfs_delayed_node *node,
1477                                                struct btrfs_key *key)
1478 {
1479         struct btrfs_delayed_item *item;
1480
1481         mutex_lock(&node->mutex);
1482         item = __btrfs_lookup_delayed_insertion_item(node, key);
1483         if (!item) {
1484                 mutex_unlock(&node->mutex);
1485                 return 1;
1486         }
1487
1488         btrfs_delayed_item_release_metadata(node->root, item);
1489         btrfs_release_delayed_item(item);
1490         mutex_unlock(&node->mutex);
1491         return 0;
1492 }
1493
1494 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1495                                    struct btrfs_inode *dir, u64 index)
1496 {
1497         struct btrfs_delayed_node *node;
1498         struct btrfs_delayed_item *item;
1499         struct btrfs_key item_key;
1500         int ret;
1501
1502         node = btrfs_get_or_create_delayed_node(dir);
1503         if (IS_ERR(node))
1504                 return PTR_ERR(node);
1505
1506         item_key.objectid = btrfs_ino(dir);
1507         item_key.type = BTRFS_DIR_INDEX_KEY;
1508         item_key.offset = index;
1509
1510         ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node,
1511                                                   &item_key);
1512         if (!ret)
1513                 goto end;
1514
1515         item = btrfs_alloc_delayed_item(0);
1516         if (!item) {
1517                 ret = -ENOMEM;
1518                 goto end;
1519         }
1520
1521         item->key = item_key;
1522
1523         ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, item);
1524         /*
1525          * we have reserved enough space when we start a new transaction,
1526          * so reserving metadata failure is impossible.
1527          */
1528         BUG_ON(ret);
1529
1530         mutex_lock(&node->mutex);
1531         ret = __btrfs_add_delayed_deletion_item(node, item);
1532         if (unlikely(ret)) {
1533                 btrfs_err(trans->fs_info,
1534                           "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1535                           index, node->root->root_key.objectid,
1536                           node->inode_id, ret);
1537                 BUG();
1538         }
1539         mutex_unlock(&node->mutex);
1540 end:
1541         btrfs_release_delayed_node(node);
1542         return ret;
1543 }
1544
1545 int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
1546 {
1547         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1548
1549         if (!delayed_node)
1550                 return -ENOENT;
1551
1552         /*
1553          * Since we have held i_mutex of this directory, it is impossible that
1554          * a new directory index is added into the delayed node and index_cnt
1555          * is updated now. So we needn't lock the delayed node.
1556          */
1557         if (!delayed_node->index_cnt) {
1558                 btrfs_release_delayed_node(delayed_node);
1559                 return -EINVAL;
1560         }
1561
1562         inode->index_cnt = delayed_node->index_cnt;
1563         btrfs_release_delayed_node(delayed_node);
1564         return 0;
1565 }
1566
1567 bool btrfs_readdir_get_delayed_items(struct inode *inode,
1568                                      struct list_head *ins_list,
1569                                      struct list_head *del_list)
1570 {
1571         struct btrfs_delayed_node *delayed_node;
1572         struct btrfs_delayed_item *item;
1573
1574         delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1575         if (!delayed_node)
1576                 return false;
1577
1578         /*
1579          * We can only do one readdir with delayed items at a time because of
1580          * item->readdir_list.
1581          */
1582         inode_unlock_shared(inode);
1583         inode_lock(inode);
1584
1585         mutex_lock(&delayed_node->mutex);
1586         item = __btrfs_first_delayed_insertion_item(delayed_node);
1587         while (item) {
1588                 refcount_inc(&item->refs);
1589                 list_add_tail(&item->readdir_list, ins_list);
1590                 item = __btrfs_next_delayed_item(item);
1591         }
1592
1593         item = __btrfs_first_delayed_deletion_item(delayed_node);
1594         while (item) {
1595                 refcount_inc(&item->refs);
1596                 list_add_tail(&item->readdir_list, del_list);
1597                 item = __btrfs_next_delayed_item(item);
1598         }
1599         mutex_unlock(&delayed_node->mutex);
1600         /*
1601          * This delayed node is still cached in the btrfs inode, so refs
1602          * must be > 1 now, and we needn't check it is going to be freed
1603          * or not.
1604          *
1605          * Besides that, this function is used to read dir, we do not
1606          * insert/delete delayed items in this period. So we also needn't
1607          * requeue or dequeue this delayed node.
1608          */
1609         refcount_dec(&delayed_node->refs);
1610
1611         return true;
1612 }
1613
1614 void btrfs_readdir_put_delayed_items(struct inode *inode,
1615                                      struct list_head *ins_list,
1616                                      struct list_head *del_list)
1617 {
1618         struct btrfs_delayed_item *curr, *next;
1619
1620         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1621                 list_del(&curr->readdir_list);
1622                 if (refcount_dec_and_test(&curr->refs))
1623                         kfree(curr);
1624         }
1625
1626         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1627                 list_del(&curr->readdir_list);
1628                 if (refcount_dec_and_test(&curr->refs))
1629                         kfree(curr);
1630         }
1631
1632         /*
1633          * The VFS is going to do up_read(), so we need to downgrade back to a
1634          * read lock.
1635          */
1636         downgrade_write(&inode->i_rwsem);
1637 }
1638
1639 int btrfs_should_delete_dir_index(struct list_head *del_list,
1640                                   u64 index)
1641 {
1642         struct btrfs_delayed_item *curr;
1643         int ret = 0;
1644
1645         list_for_each_entry(curr, del_list, readdir_list) {
1646                 if (curr->key.offset > index)
1647                         break;
1648                 if (curr->key.offset == index) {
1649                         ret = 1;
1650                         break;
1651                 }
1652         }
1653         return ret;
1654 }
1655
1656 /*
1657  * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1658  *
1659  */
1660 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1661                                     struct list_head *ins_list)
1662 {
1663         struct btrfs_dir_item *di;
1664         struct btrfs_delayed_item *curr, *next;
1665         struct btrfs_key location;
1666         char *name;
1667         int name_len;
1668         int over = 0;
1669         unsigned char d_type;
1670
1671         if (list_empty(ins_list))
1672                 return 0;
1673
1674         /*
1675          * Changing the data of the delayed item is impossible. So
1676          * we needn't lock them. And we have held i_mutex of the
1677          * directory, nobody can delete any directory indexes now.
1678          */
1679         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1680                 list_del(&curr->readdir_list);
1681
1682                 if (curr->key.offset < ctx->pos) {
1683                         if (refcount_dec_and_test(&curr->refs))
1684                                 kfree(curr);
1685                         continue;
1686                 }
1687
1688                 ctx->pos = curr->key.offset;
1689
1690                 di = (struct btrfs_dir_item *)curr->data;
1691                 name = (char *)(di + 1);
1692                 name_len = btrfs_stack_dir_name_len(di);
1693
1694                 d_type = fs_ftype_to_dtype(di->type);
1695                 btrfs_disk_key_to_cpu(&location, &di->location);
1696
1697                 over = !dir_emit(ctx, name, name_len,
1698                                location.objectid, d_type);
1699
1700                 if (refcount_dec_and_test(&curr->refs))
1701                         kfree(curr);
1702
1703                 if (over)
1704                         return 1;
1705                 ctx->pos++;
1706         }
1707         return 0;
1708 }
1709
1710 static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1711                                   struct btrfs_inode_item *inode_item,
1712                                   struct inode *inode)
1713 {
1714         btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
1715         btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
1716         btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1717         btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1718         btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1719         btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1720         btrfs_set_stack_inode_generation(inode_item,
1721                                          BTRFS_I(inode)->generation);
1722         btrfs_set_stack_inode_sequence(inode_item,
1723                                        inode_peek_iversion(inode));
1724         btrfs_set_stack_inode_transid(inode_item, trans->transid);
1725         btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1726         btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1727         btrfs_set_stack_inode_block_group(inode_item, 0);
1728
1729         btrfs_set_stack_timespec_sec(&inode_item->atime,
1730                                      inode->i_atime.tv_sec);
1731         btrfs_set_stack_timespec_nsec(&inode_item->atime,
1732                                       inode->i_atime.tv_nsec);
1733
1734         btrfs_set_stack_timespec_sec(&inode_item->mtime,
1735                                      inode->i_mtime.tv_sec);
1736         btrfs_set_stack_timespec_nsec(&inode_item->mtime,
1737                                       inode->i_mtime.tv_nsec);
1738
1739         btrfs_set_stack_timespec_sec(&inode_item->ctime,
1740                                      inode->i_ctime.tv_sec);
1741         btrfs_set_stack_timespec_nsec(&inode_item->ctime,
1742                                       inode->i_ctime.tv_nsec);
1743
1744         btrfs_set_stack_timespec_sec(&inode_item->otime,
1745                                      BTRFS_I(inode)->i_otime.tv_sec);
1746         btrfs_set_stack_timespec_nsec(&inode_item->otime,
1747                                      BTRFS_I(inode)->i_otime.tv_nsec);
1748 }
1749
1750 int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1751 {
1752         struct btrfs_delayed_node *delayed_node;
1753         struct btrfs_inode_item *inode_item;
1754
1755         delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1756         if (!delayed_node)
1757                 return -ENOENT;
1758
1759         mutex_lock(&delayed_node->mutex);
1760         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1761                 mutex_unlock(&delayed_node->mutex);
1762                 btrfs_release_delayed_node(delayed_node);
1763                 return -ENOENT;
1764         }
1765
1766         inode_item = &delayed_node->inode_item;
1767
1768         i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
1769         i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
1770         btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
1771         inode->i_mode = btrfs_stack_inode_mode(inode_item);
1772         set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1773         inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1774         BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1775         BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
1776
1777         inode_set_iversion_queried(inode,
1778                                    btrfs_stack_inode_sequence(inode_item));
1779         inode->i_rdev = 0;
1780         *rdev = btrfs_stack_inode_rdev(inode_item);
1781         BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1782
1783         inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
1784         inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
1785
1786         inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
1787         inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
1788
1789         inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
1790         inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
1791
1792         BTRFS_I(inode)->i_otime.tv_sec =
1793                 btrfs_stack_timespec_sec(&inode_item->otime);
1794         BTRFS_I(inode)->i_otime.tv_nsec =
1795                 btrfs_stack_timespec_nsec(&inode_item->otime);
1796
1797         inode->i_generation = BTRFS_I(inode)->generation;
1798         BTRFS_I(inode)->index_cnt = (u64)-1;
1799
1800         mutex_unlock(&delayed_node->mutex);
1801         btrfs_release_delayed_node(delayed_node);
1802         return 0;
1803 }
1804
1805 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1806                                struct btrfs_root *root, struct inode *inode)
1807 {
1808         struct btrfs_delayed_node *delayed_node;
1809         int ret = 0;
1810
1811         delayed_node = btrfs_get_or_create_delayed_node(BTRFS_I(inode));
1812         if (IS_ERR(delayed_node))
1813                 return PTR_ERR(delayed_node);
1814
1815         mutex_lock(&delayed_node->mutex);
1816         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1817                 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1818                 goto release_node;
1819         }
1820
1821         ret = btrfs_delayed_inode_reserve_metadata(trans, root, BTRFS_I(inode),
1822                                                    delayed_node);
1823         if (ret)
1824                 goto release_node;
1825
1826         fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1827         set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1828         delayed_node->count++;
1829         atomic_inc(&root->fs_info->delayed_root->items);
1830 release_node:
1831         mutex_unlock(&delayed_node->mutex);
1832         btrfs_release_delayed_node(delayed_node);
1833         return ret;
1834 }
1835
1836 int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
1837 {
1838         struct btrfs_fs_info *fs_info = inode->root->fs_info;
1839         struct btrfs_delayed_node *delayed_node;
1840
1841         /*
1842          * we don't do delayed inode updates during log recovery because it
1843          * leads to enospc problems.  This means we also can't do
1844          * delayed inode refs
1845          */
1846         if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
1847                 return -EAGAIN;
1848
1849         delayed_node = btrfs_get_or_create_delayed_node(inode);
1850         if (IS_ERR(delayed_node))
1851                 return PTR_ERR(delayed_node);
1852
1853         /*
1854          * We don't reserve space for inode ref deletion is because:
1855          * - We ONLY do async inode ref deletion for the inode who has only
1856          *   one link(i_nlink == 1), it means there is only one inode ref.
1857          *   And in most case, the inode ref and the inode item are in the
1858          *   same leaf, and we will deal with them at the same time.
1859          *   Since we are sure we will reserve the space for the inode item,
1860          *   it is unnecessary to reserve space for inode ref deletion.
1861          * - If the inode ref and the inode item are not in the same leaf,
1862          *   We also needn't worry about enospc problem, because we reserve
1863          *   much more space for the inode update than it needs.
1864          * - At the worst, we can steal some space from the global reservation.
1865          *   It is very rare.
1866          */
1867         mutex_lock(&delayed_node->mutex);
1868         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1869                 goto release_node;
1870
1871         set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1872         delayed_node->count++;
1873         atomic_inc(&fs_info->delayed_root->items);
1874 release_node:
1875         mutex_unlock(&delayed_node->mutex);
1876         btrfs_release_delayed_node(delayed_node);
1877         return 0;
1878 }
1879
1880 static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1881 {
1882         struct btrfs_root *root = delayed_node->root;
1883         struct btrfs_fs_info *fs_info = root->fs_info;
1884         struct btrfs_delayed_item *curr_item, *prev_item;
1885
1886         mutex_lock(&delayed_node->mutex);
1887         curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1888         while (curr_item) {
1889                 btrfs_delayed_item_release_metadata(root, curr_item);
1890                 prev_item = curr_item;
1891                 curr_item = __btrfs_next_delayed_item(prev_item);
1892                 btrfs_release_delayed_item(prev_item);
1893         }
1894
1895         curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1896         while (curr_item) {
1897                 btrfs_delayed_item_release_metadata(root, curr_item);
1898                 prev_item = curr_item;
1899                 curr_item = __btrfs_next_delayed_item(prev_item);
1900                 btrfs_release_delayed_item(prev_item);
1901         }
1902
1903         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1904                 btrfs_release_delayed_iref(delayed_node);
1905
1906         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1907                 btrfs_delayed_inode_release_metadata(fs_info, delayed_node, false);
1908                 btrfs_release_delayed_inode(delayed_node);
1909         }
1910         mutex_unlock(&delayed_node->mutex);
1911 }
1912
1913 void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
1914 {
1915         struct btrfs_delayed_node *delayed_node;
1916
1917         delayed_node = btrfs_get_delayed_node(inode);
1918         if (!delayed_node)
1919                 return;
1920
1921         __btrfs_kill_delayed_node(delayed_node);
1922         btrfs_release_delayed_node(delayed_node);
1923 }
1924
1925 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1926 {
1927         u64 inode_id = 0;
1928         struct btrfs_delayed_node *delayed_nodes[8];
1929         int i, n;
1930
1931         while (1) {
1932                 spin_lock(&root->inode_lock);
1933                 n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1934                                            (void **)delayed_nodes, inode_id,
1935                                            ARRAY_SIZE(delayed_nodes));
1936                 if (!n) {
1937                         spin_unlock(&root->inode_lock);
1938                         break;
1939                 }
1940
1941                 inode_id = delayed_nodes[n - 1]->inode_id + 1;
1942
1943                 for (i = 0; i < n; i++)
1944                         refcount_inc(&delayed_nodes[i]->refs);
1945                 spin_unlock(&root->inode_lock);
1946
1947                 for (i = 0; i < n; i++) {
1948                         __btrfs_kill_delayed_node(delayed_nodes[i]);
1949                         btrfs_release_delayed_node(delayed_nodes[i]);
1950                 }
1951         }
1952 }
1953
1954 void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info)
1955 {
1956         struct btrfs_delayed_node *curr_node, *prev_node;
1957
1958         curr_node = btrfs_first_delayed_node(fs_info->delayed_root);
1959         while (curr_node) {
1960                 __btrfs_kill_delayed_node(curr_node);
1961
1962                 prev_node = curr_node;
1963                 curr_node = btrfs_next_delayed_node(curr_node);
1964                 btrfs_release_delayed_node(prev_node);
1965         }
1966 }
1967