btrfs: drop btrfs_device::can_discard to query directly
[linux-block.git] / fs / btrfs / delayed-inode.c
1 /*
2  * Copyright (C) 2011 Fujitsu.  All rights reserved.
3  * Written by Miao Xie <miaox@cn.fujitsu.com>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public
7  * License v2 as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public
15  * License along with this program; if not, write to the
16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17  * Boston, MA 021110-1307, USA.
18  */
19
20 #include <linux/slab.h>
21 #include "delayed-inode.h"
22 #include "disk-io.h"
23 #include "transaction.h"
24 #include "ctree.h"
25
26 #define BTRFS_DELAYED_WRITEBACK         512
27 #define BTRFS_DELAYED_BACKGROUND        128
28 #define BTRFS_DELAYED_BATCH             16
29
30 static struct kmem_cache *delayed_node_cache;
31
32 int __init btrfs_delayed_inode_init(void)
33 {
34         delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
35                                         sizeof(struct btrfs_delayed_node),
36                                         0,
37                                         SLAB_MEM_SPREAD,
38                                         NULL);
39         if (!delayed_node_cache)
40                 return -ENOMEM;
41         return 0;
42 }
43
44 void btrfs_delayed_inode_exit(void)
45 {
46         kmem_cache_destroy(delayed_node_cache);
47 }
48
49 static inline void btrfs_init_delayed_node(
50                                 struct btrfs_delayed_node *delayed_node,
51                                 struct btrfs_root *root, u64 inode_id)
52 {
53         delayed_node->root = root;
54         delayed_node->inode_id = inode_id;
55         refcount_set(&delayed_node->refs, 0);
56         delayed_node->ins_root = RB_ROOT;
57         delayed_node->del_root = RB_ROOT;
58         mutex_init(&delayed_node->mutex);
59         INIT_LIST_HEAD(&delayed_node->n_list);
60         INIT_LIST_HEAD(&delayed_node->p_list);
61 }
62
63 static inline int btrfs_is_continuous_delayed_item(
64                                         struct btrfs_delayed_item *item1,
65                                         struct btrfs_delayed_item *item2)
66 {
67         if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
68             item1->key.objectid == item2->key.objectid &&
69             item1->key.type == item2->key.type &&
70             item1->key.offset + 1 == item2->key.offset)
71                 return 1;
72         return 0;
73 }
74
75 static struct btrfs_delayed_node *btrfs_get_delayed_node(
76                 struct btrfs_inode *btrfs_inode)
77 {
78         struct btrfs_root *root = btrfs_inode->root;
79         u64 ino = btrfs_ino(btrfs_inode);
80         struct btrfs_delayed_node *node;
81
82         node = READ_ONCE(btrfs_inode->delayed_node);
83         if (node) {
84                 refcount_inc(&node->refs);
85                 return node;
86         }
87
88         spin_lock(&root->inode_lock);
89         node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
90
91         if (node) {
92                 if (btrfs_inode->delayed_node) {
93                         refcount_inc(&node->refs);      /* can be accessed */
94                         BUG_ON(btrfs_inode->delayed_node != node);
95                         spin_unlock(&root->inode_lock);
96                         return node;
97                 }
98
99                 /*
100                  * It's possible that we're racing into the middle of removing
101                  * this node from the radix tree.  In this case, the refcount
102                  * was zero and it should never go back to one.  Just return
103                  * NULL like it was never in the radix at all; our release
104                  * function is in the process of removing it.
105                  *
106                  * Some implementations of refcount_inc refuse to bump the
107                  * refcount once it has hit zero.  If we don't do this dance
108                  * here, refcount_inc() may decide to just WARN_ONCE() instead
109                  * of actually bumping the refcount.
110                  *
111                  * If this node is properly in the radix, we want to bump the
112                  * refcount twice, once for the inode and once for this get
113                  * operation.
114                  */
115                 if (refcount_inc_not_zero(&node->refs)) {
116                         refcount_inc(&node->refs);
117                         btrfs_inode->delayed_node = node;
118                 } else {
119                         node = NULL;
120                 }
121
122                 spin_unlock(&root->inode_lock);
123                 return node;
124         }
125         spin_unlock(&root->inode_lock);
126
127         return NULL;
128 }
129
130 /* Will return either the node or PTR_ERR(-ENOMEM) */
131 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
132                 struct btrfs_inode *btrfs_inode)
133 {
134         struct btrfs_delayed_node *node;
135         struct btrfs_root *root = btrfs_inode->root;
136         u64 ino = btrfs_ino(btrfs_inode);
137         int ret;
138
139 again:
140         node = btrfs_get_delayed_node(btrfs_inode);
141         if (node)
142                 return node;
143
144         node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
145         if (!node)
146                 return ERR_PTR(-ENOMEM);
147         btrfs_init_delayed_node(node, root, ino);
148
149         /* cached in the btrfs inode and can be accessed */
150         refcount_set(&node->refs, 2);
151
152         ret = radix_tree_preload(GFP_NOFS);
153         if (ret) {
154                 kmem_cache_free(delayed_node_cache, node);
155                 return ERR_PTR(ret);
156         }
157
158         spin_lock(&root->inode_lock);
159         ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
160         if (ret == -EEXIST) {
161                 spin_unlock(&root->inode_lock);
162                 kmem_cache_free(delayed_node_cache, node);
163                 radix_tree_preload_end();
164                 goto again;
165         }
166         btrfs_inode->delayed_node = node;
167         spin_unlock(&root->inode_lock);
168         radix_tree_preload_end();
169
170         return node;
171 }
172
173 /*
174  * Call it when holding delayed_node->mutex
175  *
176  * If mod = 1, add this node into the prepared list.
177  */
178 static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
179                                      struct btrfs_delayed_node *node,
180                                      int mod)
181 {
182         spin_lock(&root->lock);
183         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
184                 if (!list_empty(&node->p_list))
185                         list_move_tail(&node->p_list, &root->prepare_list);
186                 else if (mod)
187                         list_add_tail(&node->p_list, &root->prepare_list);
188         } else {
189                 list_add_tail(&node->n_list, &root->node_list);
190                 list_add_tail(&node->p_list, &root->prepare_list);
191                 refcount_inc(&node->refs);      /* inserted into list */
192                 root->nodes++;
193                 set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
194         }
195         spin_unlock(&root->lock);
196 }
197
198 /* Call it when holding delayed_node->mutex */
199 static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
200                                        struct btrfs_delayed_node *node)
201 {
202         spin_lock(&root->lock);
203         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
204                 root->nodes--;
205                 refcount_dec(&node->refs);      /* not in the list */
206                 list_del_init(&node->n_list);
207                 if (!list_empty(&node->p_list))
208                         list_del_init(&node->p_list);
209                 clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
210         }
211         spin_unlock(&root->lock);
212 }
213
214 static struct btrfs_delayed_node *btrfs_first_delayed_node(
215                         struct btrfs_delayed_root *delayed_root)
216 {
217         struct list_head *p;
218         struct btrfs_delayed_node *node = NULL;
219
220         spin_lock(&delayed_root->lock);
221         if (list_empty(&delayed_root->node_list))
222                 goto out;
223
224         p = delayed_root->node_list.next;
225         node = list_entry(p, struct btrfs_delayed_node, n_list);
226         refcount_inc(&node->refs);
227 out:
228         spin_unlock(&delayed_root->lock);
229
230         return node;
231 }
232
233 static struct btrfs_delayed_node *btrfs_next_delayed_node(
234                                                 struct btrfs_delayed_node *node)
235 {
236         struct btrfs_delayed_root *delayed_root;
237         struct list_head *p;
238         struct btrfs_delayed_node *next = NULL;
239
240         delayed_root = node->root->fs_info->delayed_root;
241         spin_lock(&delayed_root->lock);
242         if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
243                 /* not in the list */
244                 if (list_empty(&delayed_root->node_list))
245                         goto out;
246                 p = delayed_root->node_list.next;
247         } else if (list_is_last(&node->n_list, &delayed_root->node_list))
248                 goto out;
249         else
250                 p = node->n_list.next;
251
252         next = list_entry(p, struct btrfs_delayed_node, n_list);
253         refcount_inc(&next->refs);
254 out:
255         spin_unlock(&delayed_root->lock);
256
257         return next;
258 }
259
260 static void __btrfs_release_delayed_node(
261                                 struct btrfs_delayed_node *delayed_node,
262                                 int mod)
263 {
264         struct btrfs_delayed_root *delayed_root;
265
266         if (!delayed_node)
267                 return;
268
269         delayed_root = delayed_node->root->fs_info->delayed_root;
270
271         mutex_lock(&delayed_node->mutex);
272         if (delayed_node->count)
273                 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
274         else
275                 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
276         mutex_unlock(&delayed_node->mutex);
277
278         if (refcount_dec_and_test(&delayed_node->refs)) {
279                 struct btrfs_root *root = delayed_node->root;
280
281                 spin_lock(&root->inode_lock);
282                 /*
283                  * Once our refcount goes to zero, nobody is allowed to bump it
284                  * back up.  We can delete it now.
285                  */
286                 ASSERT(refcount_read(&delayed_node->refs) == 0);
287                 radix_tree_delete(&root->delayed_nodes_tree,
288                                   delayed_node->inode_id);
289                 spin_unlock(&root->inode_lock);
290                 kmem_cache_free(delayed_node_cache, delayed_node);
291         }
292 }
293
294 static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
295 {
296         __btrfs_release_delayed_node(node, 0);
297 }
298
299 static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
300                                         struct btrfs_delayed_root *delayed_root)
301 {
302         struct list_head *p;
303         struct btrfs_delayed_node *node = NULL;
304
305         spin_lock(&delayed_root->lock);
306         if (list_empty(&delayed_root->prepare_list))
307                 goto out;
308
309         p = delayed_root->prepare_list.next;
310         list_del_init(p);
311         node = list_entry(p, struct btrfs_delayed_node, p_list);
312         refcount_inc(&node->refs);
313 out:
314         spin_unlock(&delayed_root->lock);
315
316         return node;
317 }
318
319 static inline void btrfs_release_prepared_delayed_node(
320                                         struct btrfs_delayed_node *node)
321 {
322         __btrfs_release_delayed_node(node, 1);
323 }
324
325 static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
326 {
327         struct btrfs_delayed_item *item;
328         item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
329         if (item) {
330                 item->data_len = data_len;
331                 item->ins_or_del = 0;
332                 item->bytes_reserved = 0;
333                 item->delayed_node = NULL;
334                 refcount_set(&item->refs, 1);
335         }
336         return item;
337 }
338
339 /*
340  * __btrfs_lookup_delayed_item - look up the delayed item by key
341  * @delayed_node: pointer to the delayed node
342  * @key:          the key to look up
343  * @prev:         used to store the prev item if the right item isn't found
344  * @next:         used to store the next item if the right item isn't found
345  *
346  * Note: if we don't find the right item, we will return the prev item and
347  * the next item.
348  */
349 static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
350                                 struct rb_root *root,
351                                 struct btrfs_key *key,
352                                 struct btrfs_delayed_item **prev,
353                                 struct btrfs_delayed_item **next)
354 {
355         struct rb_node *node, *prev_node = NULL;
356         struct btrfs_delayed_item *delayed_item = NULL;
357         int ret = 0;
358
359         node = root->rb_node;
360
361         while (node) {
362                 delayed_item = rb_entry(node, struct btrfs_delayed_item,
363                                         rb_node);
364                 prev_node = node;
365                 ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
366                 if (ret < 0)
367                         node = node->rb_right;
368                 else if (ret > 0)
369                         node = node->rb_left;
370                 else
371                         return delayed_item;
372         }
373
374         if (prev) {
375                 if (!prev_node)
376                         *prev = NULL;
377                 else if (ret < 0)
378                         *prev = delayed_item;
379                 else if ((node = rb_prev(prev_node)) != NULL) {
380                         *prev = rb_entry(node, struct btrfs_delayed_item,
381                                          rb_node);
382                 } else
383                         *prev = NULL;
384         }
385
386         if (next) {
387                 if (!prev_node)
388                         *next = NULL;
389                 else if (ret > 0)
390                         *next = delayed_item;
391                 else if ((node = rb_next(prev_node)) != NULL) {
392                         *next = rb_entry(node, struct btrfs_delayed_item,
393                                          rb_node);
394                 } else
395                         *next = NULL;
396         }
397         return NULL;
398 }
399
400 static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
401                                         struct btrfs_delayed_node *delayed_node,
402                                         struct btrfs_key *key)
403 {
404         return __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
405                                            NULL, NULL);
406 }
407
408 static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
409                                     struct btrfs_delayed_item *ins,
410                                     int action)
411 {
412         struct rb_node **p, *node;
413         struct rb_node *parent_node = NULL;
414         struct rb_root *root;
415         struct btrfs_delayed_item *item;
416         int cmp;
417
418         if (action == BTRFS_DELAYED_INSERTION_ITEM)
419                 root = &delayed_node->ins_root;
420         else if (action == BTRFS_DELAYED_DELETION_ITEM)
421                 root = &delayed_node->del_root;
422         else
423                 BUG();
424         p = &root->rb_node;
425         node = &ins->rb_node;
426
427         while (*p) {
428                 parent_node = *p;
429                 item = rb_entry(parent_node, struct btrfs_delayed_item,
430                                  rb_node);
431
432                 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
433                 if (cmp < 0)
434                         p = &(*p)->rb_right;
435                 else if (cmp > 0)
436                         p = &(*p)->rb_left;
437                 else
438                         return -EEXIST;
439         }
440
441         rb_link_node(node, parent_node, p);
442         rb_insert_color(node, root);
443         ins->delayed_node = delayed_node;
444         ins->ins_or_del = action;
445
446         if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
447             action == BTRFS_DELAYED_INSERTION_ITEM &&
448             ins->key.offset >= delayed_node->index_cnt)
449                         delayed_node->index_cnt = ins->key.offset + 1;
450
451         delayed_node->count++;
452         atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
453         return 0;
454 }
455
456 static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
457                                               struct btrfs_delayed_item *item)
458 {
459         return __btrfs_add_delayed_item(node, item,
460                                         BTRFS_DELAYED_INSERTION_ITEM);
461 }
462
463 static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
464                                              struct btrfs_delayed_item *item)
465 {
466         return __btrfs_add_delayed_item(node, item,
467                                         BTRFS_DELAYED_DELETION_ITEM);
468 }
469
470 static void finish_one_item(struct btrfs_delayed_root *delayed_root)
471 {
472         int seq = atomic_inc_return(&delayed_root->items_seq);
473
474         /*
475          * atomic_dec_return implies a barrier for waitqueue_active
476          */
477         if ((atomic_dec_return(&delayed_root->items) <
478             BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
479             waitqueue_active(&delayed_root->wait))
480                 wake_up(&delayed_root->wait);
481 }
482
483 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
484 {
485         struct rb_root *root;
486         struct btrfs_delayed_root *delayed_root;
487
488         delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
489
490         BUG_ON(!delayed_root);
491         BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
492                delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
493
494         if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
495                 root = &delayed_item->delayed_node->ins_root;
496         else
497                 root = &delayed_item->delayed_node->del_root;
498
499         rb_erase(&delayed_item->rb_node, root);
500         delayed_item->delayed_node->count--;
501
502         finish_one_item(delayed_root);
503 }
504
505 static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
506 {
507         if (item) {
508                 __btrfs_remove_delayed_item(item);
509                 if (refcount_dec_and_test(&item->refs))
510                         kfree(item);
511         }
512 }
513
514 static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
515                                         struct btrfs_delayed_node *delayed_node)
516 {
517         struct rb_node *p;
518         struct btrfs_delayed_item *item = NULL;
519
520         p = rb_first(&delayed_node->ins_root);
521         if (p)
522                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
523
524         return item;
525 }
526
527 static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
528                                         struct btrfs_delayed_node *delayed_node)
529 {
530         struct rb_node *p;
531         struct btrfs_delayed_item *item = NULL;
532
533         p = rb_first(&delayed_node->del_root);
534         if (p)
535                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
536
537         return item;
538 }
539
540 static struct btrfs_delayed_item *__btrfs_next_delayed_item(
541                                                 struct btrfs_delayed_item *item)
542 {
543         struct rb_node *p;
544         struct btrfs_delayed_item *next = NULL;
545
546         p = rb_next(&item->rb_node);
547         if (p)
548                 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
549
550         return next;
551 }
552
553 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
554                                                struct btrfs_fs_info *fs_info,
555                                                struct btrfs_delayed_item *item)
556 {
557         struct btrfs_block_rsv *src_rsv;
558         struct btrfs_block_rsv *dst_rsv;
559         u64 num_bytes;
560         int ret;
561
562         if (!trans->bytes_reserved)
563                 return 0;
564
565         src_rsv = trans->block_rsv;
566         dst_rsv = &fs_info->delayed_block_rsv;
567
568         num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
569         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
570         if (!ret) {
571                 trace_btrfs_space_reservation(fs_info, "delayed_item",
572                                               item->key.objectid,
573                                               num_bytes, 1);
574                 item->bytes_reserved = num_bytes;
575         }
576
577         return ret;
578 }
579
580 static void btrfs_delayed_item_release_metadata(struct btrfs_fs_info *fs_info,
581                                                 struct btrfs_delayed_item *item)
582 {
583         struct btrfs_block_rsv *rsv;
584
585         if (!item->bytes_reserved)
586                 return;
587
588         rsv = &fs_info->delayed_block_rsv;
589         trace_btrfs_space_reservation(fs_info, "delayed_item",
590                                       item->key.objectid, item->bytes_reserved,
591                                       0);
592         btrfs_block_rsv_release(fs_info, rsv,
593                                 item->bytes_reserved);
594 }
595
596 static int btrfs_delayed_inode_reserve_metadata(
597                                         struct btrfs_trans_handle *trans,
598                                         struct btrfs_root *root,
599                                         struct btrfs_inode *inode,
600                                         struct btrfs_delayed_node *node)
601 {
602         struct btrfs_fs_info *fs_info = root->fs_info;
603         struct btrfs_block_rsv *src_rsv;
604         struct btrfs_block_rsv *dst_rsv;
605         u64 num_bytes;
606         int ret;
607
608         src_rsv = trans->block_rsv;
609         dst_rsv = &fs_info->delayed_block_rsv;
610
611         num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
612
613         /*
614          * btrfs_dirty_inode will update the inode under btrfs_join_transaction
615          * which doesn't reserve space for speed.  This is a problem since we
616          * still need to reserve space for this update, so try to reserve the
617          * space.
618          *
619          * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
620          * we always reserve enough to update the inode item.
621          */
622         if (!src_rsv || (!trans->bytes_reserved &&
623                          src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
624                 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
625                                           BTRFS_RESERVE_NO_FLUSH);
626                 /*
627                  * Since we're under a transaction reserve_metadata_bytes could
628                  * try to commit the transaction which will make it return
629                  * EAGAIN to make us stop the transaction we have, so return
630                  * ENOSPC instead so that btrfs_dirty_inode knows what to do.
631                  */
632                 if (ret == -EAGAIN)
633                         ret = -ENOSPC;
634                 if (!ret) {
635                         node->bytes_reserved = num_bytes;
636                         trace_btrfs_space_reservation(fs_info,
637                                                       "delayed_inode",
638                                                       btrfs_ino(inode),
639                                                       num_bytes, 1);
640                 }
641                 return ret;
642         }
643
644         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
645         if (!ret) {
646                 trace_btrfs_space_reservation(fs_info, "delayed_inode",
647                                               btrfs_ino(inode), num_bytes, 1);
648                 node->bytes_reserved = num_bytes;
649         }
650
651         return ret;
652 }
653
654 static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
655                                                 struct btrfs_delayed_node *node)
656 {
657         struct btrfs_block_rsv *rsv;
658
659         if (!node->bytes_reserved)
660                 return;
661
662         rsv = &fs_info->delayed_block_rsv;
663         trace_btrfs_space_reservation(fs_info, "delayed_inode",
664                                       node->inode_id, node->bytes_reserved, 0);
665         btrfs_block_rsv_release(fs_info, rsv,
666                                 node->bytes_reserved);
667         node->bytes_reserved = 0;
668 }
669
670 /*
671  * This helper will insert some continuous items into the same leaf according
672  * to the free space of the leaf.
673  */
674 static int btrfs_batch_insert_items(struct btrfs_root *root,
675                                     struct btrfs_path *path,
676                                     struct btrfs_delayed_item *item)
677 {
678         struct btrfs_fs_info *fs_info = root->fs_info;
679         struct btrfs_delayed_item *curr, *next;
680         int free_space;
681         int total_data_size = 0, total_size = 0;
682         struct extent_buffer *leaf;
683         char *data_ptr;
684         struct btrfs_key *keys;
685         u32 *data_size;
686         struct list_head head;
687         int slot;
688         int nitems;
689         int i;
690         int ret = 0;
691
692         BUG_ON(!path->nodes[0]);
693
694         leaf = path->nodes[0];
695         free_space = btrfs_leaf_free_space(fs_info, leaf);
696         INIT_LIST_HEAD(&head);
697
698         next = item;
699         nitems = 0;
700
701         /*
702          * count the number of the continuous items that we can insert in batch
703          */
704         while (total_size + next->data_len + sizeof(struct btrfs_item) <=
705                free_space) {
706                 total_data_size += next->data_len;
707                 total_size += next->data_len + sizeof(struct btrfs_item);
708                 list_add_tail(&next->tree_list, &head);
709                 nitems++;
710
711                 curr = next;
712                 next = __btrfs_next_delayed_item(curr);
713                 if (!next)
714                         break;
715
716                 if (!btrfs_is_continuous_delayed_item(curr, next))
717                         break;
718         }
719
720         if (!nitems) {
721                 ret = 0;
722                 goto out;
723         }
724
725         /*
726          * we need allocate some memory space, but it might cause the task
727          * to sleep, so we set all locked nodes in the path to blocking locks
728          * first.
729          */
730         btrfs_set_path_blocking(path);
731
732         keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
733         if (!keys) {
734                 ret = -ENOMEM;
735                 goto out;
736         }
737
738         data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
739         if (!data_size) {
740                 ret = -ENOMEM;
741                 goto error;
742         }
743
744         /* get keys of all the delayed items */
745         i = 0;
746         list_for_each_entry(next, &head, tree_list) {
747                 keys[i] = next->key;
748                 data_size[i] = next->data_len;
749                 i++;
750         }
751
752         /* reset all the locked nodes in the patch to spinning locks. */
753         btrfs_clear_path_blocking(path, NULL, 0);
754
755         /* insert the keys of the items */
756         setup_items_for_insert(root, path, keys, data_size,
757                                total_data_size, total_size, nitems);
758
759         /* insert the dir index items */
760         slot = path->slots[0];
761         list_for_each_entry_safe(curr, next, &head, tree_list) {
762                 data_ptr = btrfs_item_ptr(leaf, slot, char);
763                 write_extent_buffer(leaf, &curr->data,
764                                     (unsigned long)data_ptr,
765                                     curr->data_len);
766                 slot++;
767
768                 btrfs_delayed_item_release_metadata(fs_info, curr);
769
770                 list_del(&curr->tree_list);
771                 btrfs_release_delayed_item(curr);
772         }
773
774 error:
775         kfree(data_size);
776         kfree(keys);
777 out:
778         return ret;
779 }
780
781 /*
782  * This helper can just do simple insertion that needn't extend item for new
783  * data, such as directory name index insertion, inode insertion.
784  */
785 static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
786                                      struct btrfs_root *root,
787                                      struct btrfs_path *path,
788                                      struct btrfs_delayed_item *delayed_item)
789 {
790         struct btrfs_fs_info *fs_info = root->fs_info;
791         struct extent_buffer *leaf;
792         char *ptr;
793         int ret;
794
795         ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
796                                       delayed_item->data_len);
797         if (ret < 0 && ret != -EEXIST)
798                 return ret;
799
800         leaf = path->nodes[0];
801
802         ptr = btrfs_item_ptr(leaf, path->slots[0], char);
803
804         write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
805                             delayed_item->data_len);
806         btrfs_mark_buffer_dirty(leaf);
807
808         btrfs_delayed_item_release_metadata(fs_info, delayed_item);
809         return 0;
810 }
811
812 /*
813  * we insert an item first, then if there are some continuous items, we try
814  * to insert those items into the same leaf.
815  */
816 static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
817                                       struct btrfs_path *path,
818                                       struct btrfs_root *root,
819                                       struct btrfs_delayed_node *node)
820 {
821         struct btrfs_delayed_item *curr, *prev;
822         int ret = 0;
823
824 do_again:
825         mutex_lock(&node->mutex);
826         curr = __btrfs_first_delayed_insertion_item(node);
827         if (!curr)
828                 goto insert_end;
829
830         ret = btrfs_insert_delayed_item(trans, root, path, curr);
831         if (ret < 0) {
832                 btrfs_release_path(path);
833                 goto insert_end;
834         }
835
836         prev = curr;
837         curr = __btrfs_next_delayed_item(prev);
838         if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
839                 /* insert the continuous items into the same leaf */
840                 path->slots[0]++;
841                 btrfs_batch_insert_items(root, path, curr);
842         }
843         btrfs_release_delayed_item(prev);
844         btrfs_mark_buffer_dirty(path->nodes[0]);
845
846         btrfs_release_path(path);
847         mutex_unlock(&node->mutex);
848         goto do_again;
849
850 insert_end:
851         mutex_unlock(&node->mutex);
852         return ret;
853 }
854
855 static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
856                                     struct btrfs_root *root,
857                                     struct btrfs_path *path,
858                                     struct btrfs_delayed_item *item)
859 {
860         struct btrfs_fs_info *fs_info = root->fs_info;
861         struct btrfs_delayed_item *curr, *next;
862         struct extent_buffer *leaf;
863         struct btrfs_key key;
864         struct list_head head;
865         int nitems, i, last_item;
866         int ret = 0;
867
868         BUG_ON(!path->nodes[0]);
869
870         leaf = path->nodes[0];
871
872         i = path->slots[0];
873         last_item = btrfs_header_nritems(leaf) - 1;
874         if (i > last_item)
875                 return -ENOENT; /* FIXME: Is errno suitable? */
876
877         next = item;
878         INIT_LIST_HEAD(&head);
879         btrfs_item_key_to_cpu(leaf, &key, i);
880         nitems = 0;
881         /*
882          * count the number of the dir index items that we can delete in batch
883          */
884         while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
885                 list_add_tail(&next->tree_list, &head);
886                 nitems++;
887
888                 curr = next;
889                 next = __btrfs_next_delayed_item(curr);
890                 if (!next)
891                         break;
892
893                 if (!btrfs_is_continuous_delayed_item(curr, next))
894                         break;
895
896                 i++;
897                 if (i > last_item)
898                         break;
899                 btrfs_item_key_to_cpu(leaf, &key, i);
900         }
901
902         if (!nitems)
903                 return 0;
904
905         ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
906         if (ret)
907                 goto out;
908
909         list_for_each_entry_safe(curr, next, &head, tree_list) {
910                 btrfs_delayed_item_release_metadata(fs_info, curr);
911                 list_del(&curr->tree_list);
912                 btrfs_release_delayed_item(curr);
913         }
914
915 out:
916         return ret;
917 }
918
919 static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
920                                       struct btrfs_path *path,
921                                       struct btrfs_root *root,
922                                       struct btrfs_delayed_node *node)
923 {
924         struct btrfs_delayed_item *curr, *prev;
925         int ret = 0;
926
927 do_again:
928         mutex_lock(&node->mutex);
929         curr = __btrfs_first_delayed_deletion_item(node);
930         if (!curr)
931                 goto delete_fail;
932
933         ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
934         if (ret < 0)
935                 goto delete_fail;
936         else if (ret > 0) {
937                 /*
938                  * can't find the item which the node points to, so this node
939                  * is invalid, just drop it.
940                  */
941                 prev = curr;
942                 curr = __btrfs_next_delayed_item(prev);
943                 btrfs_release_delayed_item(prev);
944                 ret = 0;
945                 btrfs_release_path(path);
946                 if (curr) {
947                         mutex_unlock(&node->mutex);
948                         goto do_again;
949                 } else
950                         goto delete_fail;
951         }
952
953         btrfs_batch_delete_items(trans, root, path, curr);
954         btrfs_release_path(path);
955         mutex_unlock(&node->mutex);
956         goto do_again;
957
958 delete_fail:
959         btrfs_release_path(path);
960         mutex_unlock(&node->mutex);
961         return ret;
962 }
963
964 static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
965 {
966         struct btrfs_delayed_root *delayed_root;
967
968         if (delayed_node &&
969             test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
970                 BUG_ON(!delayed_node->root);
971                 clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
972                 delayed_node->count--;
973
974                 delayed_root = delayed_node->root->fs_info->delayed_root;
975                 finish_one_item(delayed_root);
976         }
977 }
978
979 static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
980 {
981         struct btrfs_delayed_root *delayed_root;
982
983         ASSERT(delayed_node->root);
984         clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
985         delayed_node->count--;
986
987         delayed_root = delayed_node->root->fs_info->delayed_root;
988         finish_one_item(delayed_root);
989 }
990
991 static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
992                                         struct btrfs_root *root,
993                                         struct btrfs_path *path,
994                                         struct btrfs_delayed_node *node)
995 {
996         struct btrfs_fs_info *fs_info = root->fs_info;
997         struct btrfs_key key;
998         struct btrfs_inode_item *inode_item;
999         struct extent_buffer *leaf;
1000         int mod;
1001         int ret;
1002
1003         key.objectid = node->inode_id;
1004         key.type = BTRFS_INODE_ITEM_KEY;
1005         key.offset = 0;
1006
1007         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1008                 mod = -1;
1009         else
1010                 mod = 1;
1011
1012         ret = btrfs_lookup_inode(trans, root, path, &key, mod);
1013         if (ret > 0) {
1014                 btrfs_release_path(path);
1015                 return -ENOENT;
1016         } else if (ret < 0) {
1017                 return ret;
1018         }
1019
1020         leaf = path->nodes[0];
1021         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1022                                     struct btrfs_inode_item);
1023         write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1024                             sizeof(struct btrfs_inode_item));
1025         btrfs_mark_buffer_dirty(leaf);
1026
1027         if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1028                 goto no_iref;
1029
1030         path->slots[0]++;
1031         if (path->slots[0] >= btrfs_header_nritems(leaf))
1032                 goto search;
1033 again:
1034         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1035         if (key.objectid != node->inode_id)
1036                 goto out;
1037
1038         if (key.type != BTRFS_INODE_REF_KEY &&
1039             key.type != BTRFS_INODE_EXTREF_KEY)
1040                 goto out;
1041
1042         /*
1043          * Delayed iref deletion is for the inode who has only one link,
1044          * so there is only one iref. The case that several irefs are
1045          * in the same item doesn't exist.
1046          */
1047         btrfs_del_item(trans, root, path);
1048 out:
1049         btrfs_release_delayed_iref(node);
1050 no_iref:
1051         btrfs_release_path(path);
1052 err_out:
1053         btrfs_delayed_inode_release_metadata(fs_info, node);
1054         btrfs_release_delayed_inode(node);
1055
1056         return ret;
1057
1058 search:
1059         btrfs_release_path(path);
1060
1061         key.type = BTRFS_INODE_EXTREF_KEY;
1062         key.offset = -1;
1063         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1064         if (ret < 0)
1065                 goto err_out;
1066         ASSERT(ret);
1067
1068         ret = 0;
1069         leaf = path->nodes[0];
1070         path->slots[0]--;
1071         goto again;
1072 }
1073
1074 static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1075                                              struct btrfs_root *root,
1076                                              struct btrfs_path *path,
1077                                              struct btrfs_delayed_node *node)
1078 {
1079         int ret;
1080
1081         mutex_lock(&node->mutex);
1082         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) {
1083                 mutex_unlock(&node->mutex);
1084                 return 0;
1085         }
1086
1087         ret = __btrfs_update_delayed_inode(trans, root, path, node);
1088         mutex_unlock(&node->mutex);
1089         return ret;
1090 }
1091
1092 static inline int
1093 __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1094                                    struct btrfs_path *path,
1095                                    struct btrfs_delayed_node *node)
1096 {
1097         int ret;
1098
1099         ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1100         if (ret)
1101                 return ret;
1102
1103         ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1104         if (ret)
1105                 return ret;
1106
1107         ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1108         return ret;
1109 }
1110
1111 /*
1112  * Called when committing the transaction.
1113  * Returns 0 on success.
1114  * Returns < 0 on error and returns with an aborted transaction with any
1115  * outstanding delayed items cleaned up.
1116  */
1117 static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1118                                      struct btrfs_fs_info *fs_info, int nr)
1119 {
1120         struct btrfs_delayed_root *delayed_root;
1121         struct btrfs_delayed_node *curr_node, *prev_node;
1122         struct btrfs_path *path;
1123         struct btrfs_block_rsv *block_rsv;
1124         int ret = 0;
1125         bool count = (nr > 0);
1126
1127         if (trans->aborted)
1128                 return -EIO;
1129
1130         path = btrfs_alloc_path();
1131         if (!path)
1132                 return -ENOMEM;
1133         path->leave_spinning = 1;
1134
1135         block_rsv = trans->block_rsv;
1136         trans->block_rsv = &fs_info->delayed_block_rsv;
1137
1138         delayed_root = fs_info->delayed_root;
1139
1140         curr_node = btrfs_first_delayed_node(delayed_root);
1141         while (curr_node && (!count || (count && nr--))) {
1142                 ret = __btrfs_commit_inode_delayed_items(trans, path,
1143                                                          curr_node);
1144                 if (ret) {
1145                         btrfs_release_delayed_node(curr_node);
1146                         curr_node = NULL;
1147                         btrfs_abort_transaction(trans, ret);
1148                         break;
1149                 }
1150
1151                 prev_node = curr_node;
1152                 curr_node = btrfs_next_delayed_node(curr_node);
1153                 btrfs_release_delayed_node(prev_node);
1154         }
1155
1156         if (curr_node)
1157                 btrfs_release_delayed_node(curr_node);
1158         btrfs_free_path(path);
1159         trans->block_rsv = block_rsv;
1160
1161         return ret;
1162 }
1163
1164 int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1165                             struct btrfs_fs_info *fs_info)
1166 {
1167         return __btrfs_run_delayed_items(trans, fs_info, -1);
1168 }
1169
1170 int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
1171                                struct btrfs_fs_info *fs_info, int nr)
1172 {
1173         return __btrfs_run_delayed_items(trans, fs_info, nr);
1174 }
1175
1176 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1177                                      struct btrfs_inode *inode)
1178 {
1179         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1180         struct btrfs_path *path;
1181         struct btrfs_block_rsv *block_rsv;
1182         int ret;
1183
1184         if (!delayed_node)
1185                 return 0;
1186
1187         mutex_lock(&delayed_node->mutex);
1188         if (!delayed_node->count) {
1189                 mutex_unlock(&delayed_node->mutex);
1190                 btrfs_release_delayed_node(delayed_node);
1191                 return 0;
1192         }
1193         mutex_unlock(&delayed_node->mutex);
1194
1195         path = btrfs_alloc_path();
1196         if (!path) {
1197                 btrfs_release_delayed_node(delayed_node);
1198                 return -ENOMEM;
1199         }
1200         path->leave_spinning = 1;
1201
1202         block_rsv = trans->block_rsv;
1203         trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1204
1205         ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1206
1207         btrfs_release_delayed_node(delayed_node);
1208         btrfs_free_path(path);
1209         trans->block_rsv = block_rsv;
1210
1211         return ret;
1212 }
1213
1214 int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
1215 {
1216         struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
1217         struct btrfs_trans_handle *trans;
1218         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1219         struct btrfs_path *path;
1220         struct btrfs_block_rsv *block_rsv;
1221         int ret;
1222
1223         if (!delayed_node)
1224                 return 0;
1225
1226         mutex_lock(&delayed_node->mutex);
1227         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1228                 mutex_unlock(&delayed_node->mutex);
1229                 btrfs_release_delayed_node(delayed_node);
1230                 return 0;
1231         }
1232         mutex_unlock(&delayed_node->mutex);
1233
1234         trans = btrfs_join_transaction(delayed_node->root);
1235         if (IS_ERR(trans)) {
1236                 ret = PTR_ERR(trans);
1237                 goto out;
1238         }
1239
1240         path = btrfs_alloc_path();
1241         if (!path) {
1242                 ret = -ENOMEM;
1243                 goto trans_out;
1244         }
1245         path->leave_spinning = 1;
1246
1247         block_rsv = trans->block_rsv;
1248         trans->block_rsv = &fs_info->delayed_block_rsv;
1249
1250         mutex_lock(&delayed_node->mutex);
1251         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags))
1252                 ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1253                                                    path, delayed_node);
1254         else
1255                 ret = 0;
1256         mutex_unlock(&delayed_node->mutex);
1257
1258         btrfs_free_path(path);
1259         trans->block_rsv = block_rsv;
1260 trans_out:
1261         btrfs_end_transaction(trans);
1262         btrfs_btree_balance_dirty(fs_info);
1263 out:
1264         btrfs_release_delayed_node(delayed_node);
1265
1266         return ret;
1267 }
1268
1269 void btrfs_remove_delayed_node(struct btrfs_inode *inode)
1270 {
1271         struct btrfs_delayed_node *delayed_node;
1272
1273         delayed_node = READ_ONCE(inode->delayed_node);
1274         if (!delayed_node)
1275                 return;
1276
1277         inode->delayed_node = NULL;
1278         btrfs_release_delayed_node(delayed_node);
1279 }
1280
1281 struct btrfs_async_delayed_work {
1282         struct btrfs_delayed_root *delayed_root;
1283         int nr;
1284         struct btrfs_work work;
1285 };
1286
1287 static void btrfs_async_run_delayed_root(struct btrfs_work *work)
1288 {
1289         struct btrfs_async_delayed_work *async_work;
1290         struct btrfs_delayed_root *delayed_root;
1291         struct btrfs_trans_handle *trans;
1292         struct btrfs_path *path;
1293         struct btrfs_delayed_node *delayed_node = NULL;
1294         struct btrfs_root *root;
1295         struct btrfs_block_rsv *block_rsv;
1296         int total_done = 0;
1297
1298         async_work = container_of(work, struct btrfs_async_delayed_work, work);
1299         delayed_root = async_work->delayed_root;
1300
1301         path = btrfs_alloc_path();
1302         if (!path)
1303                 goto out;
1304
1305         do {
1306                 if (atomic_read(&delayed_root->items) <
1307                     BTRFS_DELAYED_BACKGROUND / 2)
1308                         break;
1309
1310                 delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
1311                 if (!delayed_node)
1312                         break;
1313
1314                 path->leave_spinning = 1;
1315                 root = delayed_node->root;
1316
1317                 trans = btrfs_join_transaction(root);
1318                 if (IS_ERR(trans)) {
1319                         btrfs_release_path(path);
1320                         btrfs_release_prepared_delayed_node(delayed_node);
1321                         total_done++;
1322                         continue;
1323                 }
1324
1325                 block_rsv = trans->block_rsv;
1326                 trans->block_rsv = &root->fs_info->delayed_block_rsv;
1327
1328                 __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1329
1330                 trans->block_rsv = block_rsv;
1331                 btrfs_end_transaction(trans);
1332                 btrfs_btree_balance_dirty_nodelay(root->fs_info);
1333
1334                 btrfs_release_path(path);
1335                 btrfs_release_prepared_delayed_node(delayed_node);
1336                 total_done++;
1337
1338         } while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK)
1339                  || total_done < async_work->nr);
1340
1341         btrfs_free_path(path);
1342 out:
1343         wake_up(&delayed_root->wait);
1344         kfree(async_work);
1345 }
1346
1347
1348 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1349                                      struct btrfs_fs_info *fs_info, int nr)
1350 {
1351         struct btrfs_async_delayed_work *async_work;
1352
1353         async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
1354         if (!async_work)
1355                 return -ENOMEM;
1356
1357         async_work->delayed_root = delayed_root;
1358         btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
1359                         btrfs_async_run_delayed_root, NULL, NULL);
1360         async_work->nr = nr;
1361
1362         btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
1363         return 0;
1364 }
1365
1366 void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
1367 {
1368         WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root));
1369 }
1370
1371 static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
1372 {
1373         int val = atomic_read(&delayed_root->items_seq);
1374
1375         if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
1376                 return 1;
1377
1378         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1379                 return 1;
1380
1381         return 0;
1382 }
1383
1384 void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
1385 {
1386         struct btrfs_delayed_root *delayed_root = fs_info->delayed_root;
1387
1388         if ((atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) ||
1389                 btrfs_workqueue_normal_congested(fs_info->delayed_workers))
1390                 return;
1391
1392         if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1393                 int seq;
1394                 int ret;
1395
1396                 seq = atomic_read(&delayed_root->items_seq);
1397
1398                 ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0);
1399                 if (ret)
1400                         return;
1401
1402                 wait_event_interruptible(delayed_root->wait,
1403                                          could_end_wait(delayed_root, seq));
1404                 return;
1405         }
1406
1407         btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
1408 }
1409
1410 /* Will return 0 or -ENOMEM */
1411 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1412                                    struct btrfs_fs_info *fs_info,
1413                                    const char *name, int name_len,
1414                                    struct btrfs_inode *dir,
1415                                    struct btrfs_disk_key *disk_key, u8 type,
1416                                    u64 index)
1417 {
1418         struct btrfs_delayed_node *delayed_node;
1419         struct btrfs_delayed_item *delayed_item;
1420         struct btrfs_dir_item *dir_item;
1421         int ret;
1422
1423         delayed_node = btrfs_get_or_create_delayed_node(dir);
1424         if (IS_ERR(delayed_node))
1425                 return PTR_ERR(delayed_node);
1426
1427         delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1428         if (!delayed_item) {
1429                 ret = -ENOMEM;
1430                 goto release_node;
1431         }
1432
1433         delayed_item->key.objectid = btrfs_ino(dir);
1434         delayed_item->key.type = BTRFS_DIR_INDEX_KEY;
1435         delayed_item->key.offset = index;
1436
1437         dir_item = (struct btrfs_dir_item *)delayed_item->data;
1438         dir_item->location = *disk_key;
1439         btrfs_set_stack_dir_transid(dir_item, trans->transid);
1440         btrfs_set_stack_dir_data_len(dir_item, 0);
1441         btrfs_set_stack_dir_name_len(dir_item, name_len);
1442         btrfs_set_stack_dir_type(dir_item, type);
1443         memcpy((char *)(dir_item + 1), name, name_len);
1444
1445         ret = btrfs_delayed_item_reserve_metadata(trans, fs_info, delayed_item);
1446         /*
1447          * we have reserved enough space when we start a new transaction,
1448          * so reserving metadata failure is impossible
1449          */
1450         BUG_ON(ret);
1451
1452
1453         mutex_lock(&delayed_node->mutex);
1454         ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1455         if (unlikely(ret)) {
1456                 btrfs_err(fs_info,
1457                           "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1458                           name_len, name, delayed_node->root->objectid,
1459                           delayed_node->inode_id, ret);
1460                 BUG();
1461         }
1462         mutex_unlock(&delayed_node->mutex);
1463
1464 release_node:
1465         btrfs_release_delayed_node(delayed_node);
1466         return ret;
1467 }
1468
1469 static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
1470                                                struct btrfs_delayed_node *node,
1471                                                struct btrfs_key *key)
1472 {
1473         struct btrfs_delayed_item *item;
1474
1475         mutex_lock(&node->mutex);
1476         item = __btrfs_lookup_delayed_insertion_item(node, key);
1477         if (!item) {
1478                 mutex_unlock(&node->mutex);
1479                 return 1;
1480         }
1481
1482         btrfs_delayed_item_release_metadata(fs_info, item);
1483         btrfs_release_delayed_item(item);
1484         mutex_unlock(&node->mutex);
1485         return 0;
1486 }
1487
1488 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1489                                    struct btrfs_fs_info *fs_info,
1490                                    struct btrfs_inode *dir, u64 index)
1491 {
1492         struct btrfs_delayed_node *node;
1493         struct btrfs_delayed_item *item;
1494         struct btrfs_key item_key;
1495         int ret;
1496
1497         node = btrfs_get_or_create_delayed_node(dir);
1498         if (IS_ERR(node))
1499                 return PTR_ERR(node);
1500
1501         item_key.objectid = btrfs_ino(dir);
1502         item_key.type = BTRFS_DIR_INDEX_KEY;
1503         item_key.offset = index;
1504
1505         ret = btrfs_delete_delayed_insertion_item(fs_info, node, &item_key);
1506         if (!ret)
1507                 goto end;
1508
1509         item = btrfs_alloc_delayed_item(0);
1510         if (!item) {
1511                 ret = -ENOMEM;
1512                 goto end;
1513         }
1514
1515         item->key = item_key;
1516
1517         ret = btrfs_delayed_item_reserve_metadata(trans, fs_info, item);
1518         /*
1519          * we have reserved enough space when we start a new transaction,
1520          * so reserving metadata failure is impossible.
1521          */
1522         BUG_ON(ret);
1523
1524         mutex_lock(&node->mutex);
1525         ret = __btrfs_add_delayed_deletion_item(node, item);
1526         if (unlikely(ret)) {
1527                 btrfs_err(fs_info,
1528                           "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1529                           index, node->root->objectid, node->inode_id, ret);
1530                 BUG();
1531         }
1532         mutex_unlock(&node->mutex);
1533 end:
1534         btrfs_release_delayed_node(node);
1535         return ret;
1536 }
1537
1538 int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
1539 {
1540         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1541
1542         if (!delayed_node)
1543                 return -ENOENT;
1544
1545         /*
1546          * Since we have held i_mutex of this directory, it is impossible that
1547          * a new directory index is added into the delayed node and index_cnt
1548          * is updated now. So we needn't lock the delayed node.
1549          */
1550         if (!delayed_node->index_cnt) {
1551                 btrfs_release_delayed_node(delayed_node);
1552                 return -EINVAL;
1553         }
1554
1555         inode->index_cnt = delayed_node->index_cnt;
1556         btrfs_release_delayed_node(delayed_node);
1557         return 0;
1558 }
1559
1560 bool btrfs_readdir_get_delayed_items(struct inode *inode,
1561                                      struct list_head *ins_list,
1562                                      struct list_head *del_list)
1563 {
1564         struct btrfs_delayed_node *delayed_node;
1565         struct btrfs_delayed_item *item;
1566
1567         delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1568         if (!delayed_node)
1569                 return false;
1570
1571         /*
1572          * We can only do one readdir with delayed items at a time because of
1573          * item->readdir_list.
1574          */
1575         inode_unlock_shared(inode);
1576         inode_lock(inode);
1577
1578         mutex_lock(&delayed_node->mutex);
1579         item = __btrfs_first_delayed_insertion_item(delayed_node);
1580         while (item) {
1581                 refcount_inc(&item->refs);
1582                 list_add_tail(&item->readdir_list, ins_list);
1583                 item = __btrfs_next_delayed_item(item);
1584         }
1585
1586         item = __btrfs_first_delayed_deletion_item(delayed_node);
1587         while (item) {
1588                 refcount_inc(&item->refs);
1589                 list_add_tail(&item->readdir_list, del_list);
1590                 item = __btrfs_next_delayed_item(item);
1591         }
1592         mutex_unlock(&delayed_node->mutex);
1593         /*
1594          * This delayed node is still cached in the btrfs inode, so refs
1595          * must be > 1 now, and we needn't check it is going to be freed
1596          * or not.
1597          *
1598          * Besides that, this function is used to read dir, we do not
1599          * insert/delete delayed items in this period. So we also needn't
1600          * requeue or dequeue this delayed node.
1601          */
1602         refcount_dec(&delayed_node->refs);
1603
1604         return true;
1605 }
1606
1607 void btrfs_readdir_put_delayed_items(struct inode *inode,
1608                                      struct list_head *ins_list,
1609                                      struct list_head *del_list)
1610 {
1611         struct btrfs_delayed_item *curr, *next;
1612
1613         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1614                 list_del(&curr->readdir_list);
1615                 if (refcount_dec_and_test(&curr->refs))
1616                         kfree(curr);
1617         }
1618
1619         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1620                 list_del(&curr->readdir_list);
1621                 if (refcount_dec_and_test(&curr->refs))
1622                         kfree(curr);
1623         }
1624
1625         /*
1626          * The VFS is going to do up_read(), so we need to downgrade back to a
1627          * read lock.
1628          */
1629         downgrade_write(&inode->i_rwsem);
1630 }
1631
1632 int btrfs_should_delete_dir_index(struct list_head *del_list,
1633                                   u64 index)
1634 {
1635         struct btrfs_delayed_item *curr, *next;
1636         int ret;
1637
1638         if (list_empty(del_list))
1639                 return 0;
1640
1641         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1642                 if (curr->key.offset > index)
1643                         break;
1644
1645                 list_del(&curr->readdir_list);
1646                 ret = (curr->key.offset == index);
1647
1648                 if (refcount_dec_and_test(&curr->refs))
1649                         kfree(curr);
1650
1651                 if (ret)
1652                         return 1;
1653                 else
1654                         continue;
1655         }
1656         return 0;
1657 }
1658
1659 /*
1660  * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1661  *
1662  */
1663 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1664                                     struct list_head *ins_list)
1665 {
1666         struct btrfs_dir_item *di;
1667         struct btrfs_delayed_item *curr, *next;
1668         struct btrfs_key location;
1669         char *name;
1670         int name_len;
1671         int over = 0;
1672         unsigned char d_type;
1673
1674         if (list_empty(ins_list))
1675                 return 0;
1676
1677         /*
1678          * Changing the data of the delayed item is impossible. So
1679          * we needn't lock them. And we have held i_mutex of the
1680          * directory, nobody can delete any directory indexes now.
1681          */
1682         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1683                 list_del(&curr->readdir_list);
1684
1685                 if (curr->key.offset < ctx->pos) {
1686                         if (refcount_dec_and_test(&curr->refs))
1687                                 kfree(curr);
1688                         continue;
1689                 }
1690
1691                 ctx->pos = curr->key.offset;
1692
1693                 di = (struct btrfs_dir_item *)curr->data;
1694                 name = (char *)(di + 1);
1695                 name_len = btrfs_stack_dir_name_len(di);
1696
1697                 d_type = btrfs_filetype_table[di->type];
1698                 btrfs_disk_key_to_cpu(&location, &di->location);
1699
1700                 over = !dir_emit(ctx, name, name_len,
1701                                location.objectid, d_type);
1702
1703                 if (refcount_dec_and_test(&curr->refs))
1704                         kfree(curr);
1705
1706                 if (over)
1707                         return 1;
1708                 ctx->pos++;
1709         }
1710         return 0;
1711 }
1712
1713 static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1714                                   struct btrfs_inode_item *inode_item,
1715                                   struct inode *inode)
1716 {
1717         btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
1718         btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
1719         btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1720         btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1721         btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1722         btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1723         btrfs_set_stack_inode_generation(inode_item,
1724                                          BTRFS_I(inode)->generation);
1725         btrfs_set_stack_inode_sequence(inode_item, inode->i_version);
1726         btrfs_set_stack_inode_transid(inode_item, trans->transid);
1727         btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1728         btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1729         btrfs_set_stack_inode_block_group(inode_item, 0);
1730
1731         btrfs_set_stack_timespec_sec(&inode_item->atime,
1732                                      inode->i_atime.tv_sec);
1733         btrfs_set_stack_timespec_nsec(&inode_item->atime,
1734                                       inode->i_atime.tv_nsec);
1735
1736         btrfs_set_stack_timespec_sec(&inode_item->mtime,
1737                                      inode->i_mtime.tv_sec);
1738         btrfs_set_stack_timespec_nsec(&inode_item->mtime,
1739                                       inode->i_mtime.tv_nsec);
1740
1741         btrfs_set_stack_timespec_sec(&inode_item->ctime,
1742                                      inode->i_ctime.tv_sec);
1743         btrfs_set_stack_timespec_nsec(&inode_item->ctime,
1744                                       inode->i_ctime.tv_nsec);
1745
1746         btrfs_set_stack_timespec_sec(&inode_item->otime,
1747                                      BTRFS_I(inode)->i_otime.tv_sec);
1748         btrfs_set_stack_timespec_nsec(&inode_item->otime,
1749                                      BTRFS_I(inode)->i_otime.tv_nsec);
1750 }
1751
1752 int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1753 {
1754         struct btrfs_delayed_node *delayed_node;
1755         struct btrfs_inode_item *inode_item;
1756
1757         delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1758         if (!delayed_node)
1759                 return -ENOENT;
1760
1761         mutex_lock(&delayed_node->mutex);
1762         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1763                 mutex_unlock(&delayed_node->mutex);
1764                 btrfs_release_delayed_node(delayed_node);
1765                 return -ENOENT;
1766         }
1767
1768         inode_item = &delayed_node->inode_item;
1769
1770         i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
1771         i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
1772         btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
1773         inode->i_mode = btrfs_stack_inode_mode(inode_item);
1774         set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1775         inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1776         BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1777         BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
1778
1779         inode->i_version = btrfs_stack_inode_sequence(inode_item);
1780         inode->i_rdev = 0;
1781         *rdev = btrfs_stack_inode_rdev(inode_item);
1782         BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1783
1784         inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
1785         inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
1786
1787         inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
1788         inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
1789
1790         inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
1791         inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
1792
1793         BTRFS_I(inode)->i_otime.tv_sec =
1794                 btrfs_stack_timespec_sec(&inode_item->otime);
1795         BTRFS_I(inode)->i_otime.tv_nsec =
1796                 btrfs_stack_timespec_nsec(&inode_item->otime);
1797
1798         inode->i_generation = BTRFS_I(inode)->generation;
1799         BTRFS_I(inode)->index_cnt = (u64)-1;
1800
1801         mutex_unlock(&delayed_node->mutex);
1802         btrfs_release_delayed_node(delayed_node);
1803         return 0;
1804 }
1805
1806 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1807                                struct btrfs_root *root, struct inode *inode)
1808 {
1809         struct btrfs_delayed_node *delayed_node;
1810         int ret = 0;
1811
1812         delayed_node = btrfs_get_or_create_delayed_node(BTRFS_I(inode));
1813         if (IS_ERR(delayed_node))
1814                 return PTR_ERR(delayed_node);
1815
1816         mutex_lock(&delayed_node->mutex);
1817         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1818                 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1819                 goto release_node;
1820         }
1821
1822         ret = btrfs_delayed_inode_reserve_metadata(trans, root, BTRFS_I(inode),
1823                                                    delayed_node);
1824         if (ret)
1825                 goto release_node;
1826
1827         fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1828         set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1829         delayed_node->count++;
1830         atomic_inc(&root->fs_info->delayed_root->items);
1831 release_node:
1832         mutex_unlock(&delayed_node->mutex);
1833         btrfs_release_delayed_node(delayed_node);
1834         return ret;
1835 }
1836
1837 int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
1838 {
1839         struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
1840         struct btrfs_delayed_node *delayed_node;
1841
1842         /*
1843          * we don't do delayed inode updates during log recovery because it
1844          * leads to enospc problems.  This means we also can't do
1845          * delayed inode refs
1846          */
1847         if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
1848                 return -EAGAIN;
1849
1850         delayed_node = btrfs_get_or_create_delayed_node(inode);
1851         if (IS_ERR(delayed_node))
1852                 return PTR_ERR(delayed_node);
1853
1854         /*
1855          * We don't reserve space for inode ref deletion is because:
1856          * - We ONLY do async inode ref deletion for the inode who has only
1857          *   one link(i_nlink == 1), it means there is only one inode ref.
1858          *   And in most case, the inode ref and the inode item are in the
1859          *   same leaf, and we will deal with them at the same time.
1860          *   Since we are sure we will reserve the space for the inode item,
1861          *   it is unnecessary to reserve space for inode ref deletion.
1862          * - If the inode ref and the inode item are not in the same leaf,
1863          *   We also needn't worry about enospc problem, because we reserve
1864          *   much more space for the inode update than it needs.
1865          * - At the worst, we can steal some space from the global reservation.
1866          *   It is very rare.
1867          */
1868         mutex_lock(&delayed_node->mutex);
1869         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1870                 goto release_node;
1871
1872         set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1873         delayed_node->count++;
1874         atomic_inc(&fs_info->delayed_root->items);
1875 release_node:
1876         mutex_unlock(&delayed_node->mutex);
1877         btrfs_release_delayed_node(delayed_node);
1878         return 0;
1879 }
1880
1881 static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1882 {
1883         struct btrfs_root *root = delayed_node->root;
1884         struct btrfs_fs_info *fs_info = root->fs_info;
1885         struct btrfs_delayed_item *curr_item, *prev_item;
1886
1887         mutex_lock(&delayed_node->mutex);
1888         curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1889         while (curr_item) {
1890                 btrfs_delayed_item_release_metadata(fs_info, curr_item);
1891                 prev_item = curr_item;
1892                 curr_item = __btrfs_next_delayed_item(prev_item);
1893                 btrfs_release_delayed_item(prev_item);
1894         }
1895
1896         curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1897         while (curr_item) {
1898                 btrfs_delayed_item_release_metadata(fs_info, curr_item);
1899                 prev_item = curr_item;
1900                 curr_item = __btrfs_next_delayed_item(prev_item);
1901                 btrfs_release_delayed_item(prev_item);
1902         }
1903
1904         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1905                 btrfs_release_delayed_iref(delayed_node);
1906
1907         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1908                 btrfs_delayed_inode_release_metadata(fs_info, delayed_node);
1909                 btrfs_release_delayed_inode(delayed_node);
1910         }
1911         mutex_unlock(&delayed_node->mutex);
1912 }
1913
1914 void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
1915 {
1916         struct btrfs_delayed_node *delayed_node;
1917
1918         delayed_node = btrfs_get_delayed_node(inode);
1919         if (!delayed_node)
1920                 return;
1921
1922         __btrfs_kill_delayed_node(delayed_node);
1923         btrfs_release_delayed_node(delayed_node);
1924 }
1925
1926 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1927 {
1928         u64 inode_id = 0;
1929         struct btrfs_delayed_node *delayed_nodes[8];
1930         int i, n;
1931
1932         while (1) {
1933                 spin_lock(&root->inode_lock);
1934                 n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1935                                            (void **)delayed_nodes, inode_id,
1936                                            ARRAY_SIZE(delayed_nodes));
1937                 if (!n) {
1938                         spin_unlock(&root->inode_lock);
1939                         break;
1940                 }
1941
1942                 inode_id = delayed_nodes[n - 1]->inode_id + 1;
1943
1944                 for (i = 0; i < n; i++)
1945                         refcount_inc(&delayed_nodes[i]->refs);
1946                 spin_unlock(&root->inode_lock);
1947
1948                 for (i = 0; i < n; i++) {
1949                         __btrfs_kill_delayed_node(delayed_nodes[i]);
1950                         btrfs_release_delayed_node(delayed_nodes[i]);
1951                 }
1952         }
1953 }
1954
1955 void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info)
1956 {
1957         struct btrfs_delayed_node *curr_node, *prev_node;
1958
1959         curr_node = btrfs_first_delayed_node(fs_info->delayed_root);
1960         while (curr_node) {
1961                 __btrfs_kill_delayed_node(curr_node);
1962
1963                 prev_node = curr_node;
1964                 curr_node = btrfs_next_delayed_node(curr_node);
1965                 btrfs_release_delayed_node(prev_node);
1966         }
1967 }
1968