Btrfs: rework qgroup accounting
[linux-2.6-block.git] / fs / btrfs / qgroup.c
1 /*
2  * Copyright (C) 2011 STRATO.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/pagemap.h>
21 #include <linux/writeback.h>
22 #include <linux/blkdev.h>
23 #include <linux/rbtree.h>
24 #include <linux/slab.h>
25 #include <linux/workqueue.h>
26 #include <linux/btrfs.h>
27
28 #include "ctree.h"
29 #include "transaction.h"
30 #include "disk-io.h"
31 #include "locking.h"
32 #include "ulist.h"
33 #include "backref.h"
34 #include "extent_io.h"
35 #include "qgroup.h"
36
37 /* TODO XXX FIXME
38  *  - subvol delete -> delete when ref goes to 0? delete limits also?
39  *  - reorganize keys
40  *  - compressed
41  *  - sync
42  *  - copy also limits on subvol creation
43  *  - limit
44  *  - caches fuer ulists
45  *  - performance benchmarks
46  *  - check all ioctl parameters
47  */
48
49 /*
50  * one struct for each qgroup, organized in fs_info->qgroup_tree.
51  */
52 struct btrfs_qgroup {
53         u64 qgroupid;
54
55         /*
56          * state
57          */
58         u64 rfer;       /* referenced */
59         u64 rfer_cmpr;  /* referenced compressed */
60         u64 excl;       /* exclusive */
61         u64 excl_cmpr;  /* exclusive compressed */
62
63         /*
64          * limits
65          */
66         u64 lim_flags;  /* which limits are set */
67         u64 max_rfer;
68         u64 max_excl;
69         u64 rsv_rfer;
70         u64 rsv_excl;
71
72         /*
73          * reservation tracking
74          */
75         u64 reserved;
76
77         /*
78          * lists
79          */
80         struct list_head groups;  /* groups this group is member of */
81         struct list_head members; /* groups that are members of this group */
82         struct list_head dirty;   /* dirty groups */
83         struct rb_node node;      /* tree of qgroups */
84
85         /*
86          * temp variables for accounting operations
87          */
88         u64 old_refcnt;
89         u64 new_refcnt;
90 };
91
92 /*
93  * glue structure to represent the relations between qgroups.
94  */
95 struct btrfs_qgroup_list {
96         struct list_head next_group;
97         struct list_head next_member;
98         struct btrfs_qgroup *group;
99         struct btrfs_qgroup *member;
100 };
101
102 #define ptr_to_u64(x) ((u64)(uintptr_t)x)
103 #define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x)
104
105 static int
106 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
107                    int init_flags);
108 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
109
110 /* must be called with qgroup_ioctl_lock held */
111 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
112                                            u64 qgroupid)
113 {
114         struct rb_node *n = fs_info->qgroup_tree.rb_node;
115         struct btrfs_qgroup *qgroup;
116
117         while (n) {
118                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
119                 if (qgroup->qgroupid < qgroupid)
120                         n = n->rb_left;
121                 else if (qgroup->qgroupid > qgroupid)
122                         n = n->rb_right;
123                 else
124                         return qgroup;
125         }
126         return NULL;
127 }
128
129 /* must be called with qgroup_lock held */
130 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
131                                           u64 qgroupid)
132 {
133         struct rb_node **p = &fs_info->qgroup_tree.rb_node;
134         struct rb_node *parent = NULL;
135         struct btrfs_qgroup *qgroup;
136
137         while (*p) {
138                 parent = *p;
139                 qgroup = rb_entry(parent, struct btrfs_qgroup, node);
140
141                 if (qgroup->qgroupid < qgroupid)
142                         p = &(*p)->rb_left;
143                 else if (qgroup->qgroupid > qgroupid)
144                         p = &(*p)->rb_right;
145                 else
146                         return qgroup;
147         }
148
149         qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
150         if (!qgroup)
151                 return ERR_PTR(-ENOMEM);
152
153         qgroup->qgroupid = qgroupid;
154         INIT_LIST_HEAD(&qgroup->groups);
155         INIT_LIST_HEAD(&qgroup->members);
156         INIT_LIST_HEAD(&qgroup->dirty);
157
158         rb_link_node(&qgroup->node, parent, p);
159         rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
160
161         return qgroup;
162 }
163
164 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
165 {
166         struct btrfs_qgroup_list *list;
167
168         list_del(&qgroup->dirty);
169         while (!list_empty(&qgroup->groups)) {
170                 list = list_first_entry(&qgroup->groups,
171                                         struct btrfs_qgroup_list, next_group);
172                 list_del(&list->next_group);
173                 list_del(&list->next_member);
174                 kfree(list);
175         }
176
177         while (!list_empty(&qgroup->members)) {
178                 list = list_first_entry(&qgroup->members,
179                                         struct btrfs_qgroup_list, next_member);
180                 list_del(&list->next_group);
181                 list_del(&list->next_member);
182                 kfree(list);
183         }
184         kfree(qgroup);
185 }
186
187 /* must be called with qgroup_lock held */
188 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
189 {
190         struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
191
192         if (!qgroup)
193                 return -ENOENT;
194
195         rb_erase(&qgroup->node, &fs_info->qgroup_tree);
196         __del_qgroup_rb(qgroup);
197         return 0;
198 }
199
200 /* must be called with qgroup_lock held */
201 static int add_relation_rb(struct btrfs_fs_info *fs_info,
202                            u64 memberid, u64 parentid)
203 {
204         struct btrfs_qgroup *member;
205         struct btrfs_qgroup *parent;
206         struct btrfs_qgroup_list *list;
207
208         member = find_qgroup_rb(fs_info, memberid);
209         parent = find_qgroup_rb(fs_info, parentid);
210         if (!member || !parent)
211                 return -ENOENT;
212
213         list = kzalloc(sizeof(*list), GFP_ATOMIC);
214         if (!list)
215                 return -ENOMEM;
216
217         list->group = parent;
218         list->member = member;
219         list_add_tail(&list->next_group, &member->groups);
220         list_add_tail(&list->next_member, &parent->members);
221
222         return 0;
223 }
224
225 /* must be called with qgroup_lock held */
226 static int del_relation_rb(struct btrfs_fs_info *fs_info,
227                            u64 memberid, u64 parentid)
228 {
229         struct btrfs_qgroup *member;
230         struct btrfs_qgroup *parent;
231         struct btrfs_qgroup_list *list;
232
233         member = find_qgroup_rb(fs_info, memberid);
234         parent = find_qgroup_rb(fs_info, parentid);
235         if (!member || !parent)
236                 return -ENOENT;
237
238         list_for_each_entry(list, &member->groups, next_group) {
239                 if (list->group == parent) {
240                         list_del(&list->next_group);
241                         list_del(&list->next_member);
242                         kfree(list);
243                         return 0;
244                 }
245         }
246         return -ENOENT;
247 }
248
249 /*
250  * The full config is read in one go, only called from open_ctree()
251  * It doesn't use any locking, as at this point we're still single-threaded
252  */
253 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
254 {
255         struct btrfs_key key;
256         struct btrfs_key found_key;
257         struct btrfs_root *quota_root = fs_info->quota_root;
258         struct btrfs_path *path = NULL;
259         struct extent_buffer *l;
260         int slot;
261         int ret = 0;
262         u64 flags = 0;
263         u64 rescan_progress = 0;
264
265         if (!fs_info->quota_enabled)
266                 return 0;
267
268         fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
269         if (!fs_info->qgroup_ulist) {
270                 ret = -ENOMEM;
271                 goto out;
272         }
273
274         path = btrfs_alloc_path();
275         if (!path) {
276                 ret = -ENOMEM;
277                 goto out;
278         }
279
280         /* default this to quota off, in case no status key is found */
281         fs_info->qgroup_flags = 0;
282
283         /*
284          * pass 1: read status, all qgroup infos and limits
285          */
286         key.objectid = 0;
287         key.type = 0;
288         key.offset = 0;
289         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
290         if (ret)
291                 goto out;
292
293         while (1) {
294                 struct btrfs_qgroup *qgroup;
295
296                 slot = path->slots[0];
297                 l = path->nodes[0];
298                 btrfs_item_key_to_cpu(l, &found_key, slot);
299
300                 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
301                         struct btrfs_qgroup_status_item *ptr;
302
303                         ptr = btrfs_item_ptr(l, slot,
304                                              struct btrfs_qgroup_status_item);
305
306                         if (btrfs_qgroup_status_version(l, ptr) !=
307                             BTRFS_QGROUP_STATUS_VERSION) {
308                                 btrfs_err(fs_info,
309                                  "old qgroup version, quota disabled");
310                                 goto out;
311                         }
312                         if (btrfs_qgroup_status_generation(l, ptr) !=
313                             fs_info->generation) {
314                                 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
315                                 btrfs_err(fs_info,
316                                         "qgroup generation mismatch, "
317                                         "marked as inconsistent");
318                         }
319                         fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
320                                                                           ptr);
321                         rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
322                         goto next1;
323                 }
324
325                 if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
326                     found_key.type != BTRFS_QGROUP_LIMIT_KEY)
327                         goto next1;
328
329                 qgroup = find_qgroup_rb(fs_info, found_key.offset);
330                 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
331                     (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
332                         btrfs_err(fs_info, "inconsitent qgroup config");
333                         flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
334                 }
335                 if (!qgroup) {
336                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
337                         if (IS_ERR(qgroup)) {
338                                 ret = PTR_ERR(qgroup);
339                                 goto out;
340                         }
341                 }
342                 switch (found_key.type) {
343                 case BTRFS_QGROUP_INFO_KEY: {
344                         struct btrfs_qgroup_info_item *ptr;
345
346                         ptr = btrfs_item_ptr(l, slot,
347                                              struct btrfs_qgroup_info_item);
348                         qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
349                         qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
350                         qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
351                         qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
352                         /* generation currently unused */
353                         break;
354                 }
355                 case BTRFS_QGROUP_LIMIT_KEY: {
356                         struct btrfs_qgroup_limit_item *ptr;
357
358                         ptr = btrfs_item_ptr(l, slot,
359                                              struct btrfs_qgroup_limit_item);
360                         qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
361                         qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
362                         qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
363                         qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
364                         qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
365                         break;
366                 }
367                 }
368 next1:
369                 ret = btrfs_next_item(quota_root, path);
370                 if (ret < 0)
371                         goto out;
372                 if (ret)
373                         break;
374         }
375         btrfs_release_path(path);
376
377         /*
378          * pass 2: read all qgroup relations
379          */
380         key.objectid = 0;
381         key.type = BTRFS_QGROUP_RELATION_KEY;
382         key.offset = 0;
383         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
384         if (ret)
385                 goto out;
386         while (1) {
387                 slot = path->slots[0];
388                 l = path->nodes[0];
389                 btrfs_item_key_to_cpu(l, &found_key, slot);
390
391                 if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
392                         goto next2;
393
394                 if (found_key.objectid > found_key.offset) {
395                         /* parent <- member, not needed to build config */
396                         /* FIXME should we omit the key completely? */
397                         goto next2;
398                 }
399
400                 ret = add_relation_rb(fs_info, found_key.objectid,
401                                       found_key.offset);
402                 if (ret == -ENOENT) {
403                         btrfs_warn(fs_info,
404                                 "orphan qgroup relation 0x%llx->0x%llx",
405                                 found_key.objectid, found_key.offset);
406                         ret = 0;        /* ignore the error */
407                 }
408                 if (ret)
409                         goto out;
410 next2:
411                 ret = btrfs_next_item(quota_root, path);
412                 if (ret < 0)
413                         goto out;
414                 if (ret)
415                         break;
416         }
417 out:
418         fs_info->qgroup_flags |= flags;
419         if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
420                 fs_info->quota_enabled = 0;
421                 fs_info->pending_quota_state = 0;
422         } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
423                    ret >= 0) {
424                 ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
425         }
426         btrfs_free_path(path);
427
428         if (ret < 0) {
429                 ulist_free(fs_info->qgroup_ulist);
430                 fs_info->qgroup_ulist = NULL;
431                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
432         }
433
434         return ret < 0 ? ret : 0;
435 }
436
437 /*
438  * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
439  * first two are in single-threaded paths.And for the third one, we have set
440  * quota_root to be null with qgroup_lock held before, so it is safe to clean
441  * up the in-memory structures without qgroup_lock held.
442  */
443 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
444 {
445         struct rb_node *n;
446         struct btrfs_qgroup *qgroup;
447
448         while ((n = rb_first(&fs_info->qgroup_tree))) {
449                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
450                 rb_erase(n, &fs_info->qgroup_tree);
451                 __del_qgroup_rb(qgroup);
452         }
453         /*
454          * we call btrfs_free_qgroup_config() when umounting
455          * filesystem and disabling quota, so we set qgroup_ulit
456          * to be null here to avoid double free.
457          */
458         ulist_free(fs_info->qgroup_ulist);
459         fs_info->qgroup_ulist = NULL;
460 }
461
462 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
463                                     struct btrfs_root *quota_root,
464                                     u64 src, u64 dst)
465 {
466         int ret;
467         struct btrfs_path *path;
468         struct btrfs_key key;
469
470         path = btrfs_alloc_path();
471         if (!path)
472                 return -ENOMEM;
473
474         key.objectid = src;
475         key.type = BTRFS_QGROUP_RELATION_KEY;
476         key.offset = dst;
477
478         ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
479
480         btrfs_mark_buffer_dirty(path->nodes[0]);
481
482         btrfs_free_path(path);
483         return ret;
484 }
485
486 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
487                                     struct btrfs_root *quota_root,
488                                     u64 src, u64 dst)
489 {
490         int ret;
491         struct btrfs_path *path;
492         struct btrfs_key key;
493
494         path = btrfs_alloc_path();
495         if (!path)
496                 return -ENOMEM;
497
498         key.objectid = src;
499         key.type = BTRFS_QGROUP_RELATION_KEY;
500         key.offset = dst;
501
502         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
503         if (ret < 0)
504                 goto out;
505
506         if (ret > 0) {
507                 ret = -ENOENT;
508                 goto out;
509         }
510
511         ret = btrfs_del_item(trans, quota_root, path);
512 out:
513         btrfs_free_path(path);
514         return ret;
515 }
516
517 static int add_qgroup_item(struct btrfs_trans_handle *trans,
518                            struct btrfs_root *quota_root, u64 qgroupid)
519 {
520         int ret;
521         struct btrfs_path *path;
522         struct btrfs_qgroup_info_item *qgroup_info;
523         struct btrfs_qgroup_limit_item *qgroup_limit;
524         struct extent_buffer *leaf;
525         struct btrfs_key key;
526
527         path = btrfs_alloc_path();
528         if (!path)
529                 return -ENOMEM;
530
531         key.objectid = 0;
532         key.type = BTRFS_QGROUP_INFO_KEY;
533         key.offset = qgroupid;
534
535         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
536                                       sizeof(*qgroup_info));
537         if (ret)
538                 goto out;
539
540         leaf = path->nodes[0];
541         qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
542                                  struct btrfs_qgroup_info_item);
543         btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
544         btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
545         btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
546         btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
547         btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
548
549         btrfs_mark_buffer_dirty(leaf);
550
551         btrfs_release_path(path);
552
553         key.type = BTRFS_QGROUP_LIMIT_KEY;
554         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
555                                       sizeof(*qgroup_limit));
556         if (ret)
557                 goto out;
558
559         leaf = path->nodes[0];
560         qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
561                                   struct btrfs_qgroup_limit_item);
562         btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
563         btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
564         btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
565         btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
566         btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
567
568         btrfs_mark_buffer_dirty(leaf);
569
570         ret = 0;
571 out:
572         btrfs_free_path(path);
573         return ret;
574 }
575
576 static int del_qgroup_item(struct btrfs_trans_handle *trans,
577                            struct btrfs_root *quota_root, u64 qgroupid)
578 {
579         int ret;
580         struct btrfs_path *path;
581         struct btrfs_key key;
582
583         path = btrfs_alloc_path();
584         if (!path)
585                 return -ENOMEM;
586
587         key.objectid = 0;
588         key.type = BTRFS_QGROUP_INFO_KEY;
589         key.offset = qgroupid;
590         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
591         if (ret < 0)
592                 goto out;
593
594         if (ret > 0) {
595                 ret = -ENOENT;
596                 goto out;
597         }
598
599         ret = btrfs_del_item(trans, quota_root, path);
600         if (ret)
601                 goto out;
602
603         btrfs_release_path(path);
604
605         key.type = BTRFS_QGROUP_LIMIT_KEY;
606         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
607         if (ret < 0)
608                 goto out;
609
610         if (ret > 0) {
611                 ret = -ENOENT;
612                 goto out;
613         }
614
615         ret = btrfs_del_item(trans, quota_root, path);
616
617 out:
618         btrfs_free_path(path);
619         return ret;
620 }
621
622 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
623                                     struct btrfs_root *root, u64 qgroupid,
624                                     u64 flags, u64 max_rfer, u64 max_excl,
625                                     u64 rsv_rfer, u64 rsv_excl)
626 {
627         struct btrfs_path *path;
628         struct btrfs_key key;
629         struct extent_buffer *l;
630         struct btrfs_qgroup_limit_item *qgroup_limit;
631         int ret;
632         int slot;
633
634         key.objectid = 0;
635         key.type = BTRFS_QGROUP_LIMIT_KEY;
636         key.offset = qgroupid;
637
638         path = btrfs_alloc_path();
639         if (!path)
640                 return -ENOMEM;
641
642         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
643         if (ret > 0)
644                 ret = -ENOENT;
645
646         if (ret)
647                 goto out;
648
649         l = path->nodes[0];
650         slot = path->slots[0];
651         qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
652         btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
653         btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
654         btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
655         btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
656         btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
657
658         btrfs_mark_buffer_dirty(l);
659
660 out:
661         btrfs_free_path(path);
662         return ret;
663 }
664
665 static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
666                                    struct btrfs_root *root,
667                                    struct btrfs_qgroup *qgroup)
668 {
669         struct btrfs_path *path;
670         struct btrfs_key key;
671         struct extent_buffer *l;
672         struct btrfs_qgroup_info_item *qgroup_info;
673         int ret;
674         int slot;
675
676         key.objectid = 0;
677         key.type = BTRFS_QGROUP_INFO_KEY;
678         key.offset = qgroup->qgroupid;
679
680         path = btrfs_alloc_path();
681         if (!path)
682                 return -ENOMEM;
683
684         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
685         if (ret > 0)
686                 ret = -ENOENT;
687
688         if (ret)
689                 goto out;
690
691         l = path->nodes[0];
692         slot = path->slots[0];
693         qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
694         btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
695         btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
696         btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
697         btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
698         btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
699
700         btrfs_mark_buffer_dirty(l);
701
702 out:
703         btrfs_free_path(path);
704         return ret;
705 }
706
707 static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
708                                      struct btrfs_fs_info *fs_info,
709                                     struct btrfs_root *root)
710 {
711         struct btrfs_path *path;
712         struct btrfs_key key;
713         struct extent_buffer *l;
714         struct btrfs_qgroup_status_item *ptr;
715         int ret;
716         int slot;
717
718         key.objectid = 0;
719         key.type = BTRFS_QGROUP_STATUS_KEY;
720         key.offset = 0;
721
722         path = btrfs_alloc_path();
723         if (!path)
724                 return -ENOMEM;
725
726         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
727         if (ret > 0)
728                 ret = -ENOENT;
729
730         if (ret)
731                 goto out;
732
733         l = path->nodes[0];
734         slot = path->slots[0];
735         ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
736         btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
737         btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
738         btrfs_set_qgroup_status_rescan(l, ptr,
739                                 fs_info->qgroup_rescan_progress.objectid);
740
741         btrfs_mark_buffer_dirty(l);
742
743 out:
744         btrfs_free_path(path);
745         return ret;
746 }
747
748 /*
749  * called with qgroup_lock held
750  */
751 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
752                                   struct btrfs_root *root)
753 {
754         struct btrfs_path *path;
755         struct btrfs_key key;
756         struct extent_buffer *leaf = NULL;
757         int ret;
758         int nr = 0;
759
760         path = btrfs_alloc_path();
761         if (!path)
762                 return -ENOMEM;
763
764         path->leave_spinning = 1;
765
766         key.objectid = 0;
767         key.offset = 0;
768         key.type = 0;
769
770         while (1) {
771                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
772                 if (ret < 0)
773                         goto out;
774                 leaf = path->nodes[0];
775                 nr = btrfs_header_nritems(leaf);
776                 if (!nr)
777                         break;
778                 /*
779                  * delete the leaf one by one
780                  * since the whole tree is going
781                  * to be deleted.
782                  */
783                 path->slots[0] = 0;
784                 ret = btrfs_del_items(trans, root, path, 0, nr);
785                 if (ret)
786                         goto out;
787
788                 btrfs_release_path(path);
789         }
790         ret = 0;
791 out:
792         root->fs_info->pending_quota_state = 0;
793         btrfs_free_path(path);
794         return ret;
795 }
796
797 int btrfs_quota_enable(struct btrfs_trans_handle *trans,
798                        struct btrfs_fs_info *fs_info)
799 {
800         struct btrfs_root *quota_root;
801         struct btrfs_root *tree_root = fs_info->tree_root;
802         struct btrfs_path *path = NULL;
803         struct btrfs_qgroup_status_item *ptr;
804         struct extent_buffer *leaf;
805         struct btrfs_key key;
806         struct btrfs_key found_key;
807         struct btrfs_qgroup *qgroup = NULL;
808         int ret = 0;
809         int slot;
810
811         mutex_lock(&fs_info->qgroup_ioctl_lock);
812         if (fs_info->quota_root) {
813                 fs_info->pending_quota_state = 1;
814                 goto out;
815         }
816
817         fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
818         if (!fs_info->qgroup_ulist) {
819                 ret = -ENOMEM;
820                 goto out;
821         }
822
823         /*
824          * initially create the quota tree
825          */
826         quota_root = btrfs_create_tree(trans, fs_info,
827                                        BTRFS_QUOTA_TREE_OBJECTID);
828         if (IS_ERR(quota_root)) {
829                 ret =  PTR_ERR(quota_root);
830                 goto out;
831         }
832
833         path = btrfs_alloc_path();
834         if (!path) {
835                 ret = -ENOMEM;
836                 goto out_free_root;
837         }
838
839         key.objectid = 0;
840         key.type = BTRFS_QGROUP_STATUS_KEY;
841         key.offset = 0;
842
843         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
844                                       sizeof(*ptr));
845         if (ret)
846                 goto out_free_path;
847
848         leaf = path->nodes[0];
849         ptr = btrfs_item_ptr(leaf, path->slots[0],
850                                  struct btrfs_qgroup_status_item);
851         btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
852         btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
853         fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
854                                 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
855         btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
856         btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
857
858         btrfs_mark_buffer_dirty(leaf);
859
860         key.objectid = 0;
861         key.type = BTRFS_ROOT_REF_KEY;
862         key.offset = 0;
863
864         btrfs_release_path(path);
865         ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
866         if (ret > 0)
867                 goto out_add_root;
868         if (ret < 0)
869                 goto out_free_path;
870
871
872         while (1) {
873                 slot = path->slots[0];
874                 leaf = path->nodes[0];
875                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
876
877                 if (found_key.type == BTRFS_ROOT_REF_KEY) {
878                         ret = add_qgroup_item(trans, quota_root,
879                                               found_key.offset);
880                         if (ret)
881                                 goto out_free_path;
882
883                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
884                         if (IS_ERR(qgroup)) {
885                                 ret = PTR_ERR(qgroup);
886                                 goto out_free_path;
887                         }
888                 }
889                 ret = btrfs_next_item(tree_root, path);
890                 if (ret < 0)
891                         goto out_free_path;
892                 if (ret)
893                         break;
894         }
895
896 out_add_root:
897         btrfs_release_path(path);
898         ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
899         if (ret)
900                 goto out_free_path;
901
902         qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
903         if (IS_ERR(qgroup)) {
904                 ret = PTR_ERR(qgroup);
905                 goto out_free_path;
906         }
907         spin_lock(&fs_info->qgroup_lock);
908         fs_info->quota_root = quota_root;
909         fs_info->pending_quota_state = 1;
910         spin_unlock(&fs_info->qgroup_lock);
911 out_free_path:
912         btrfs_free_path(path);
913 out_free_root:
914         if (ret) {
915                 free_extent_buffer(quota_root->node);
916                 free_extent_buffer(quota_root->commit_root);
917                 kfree(quota_root);
918         }
919 out:
920         if (ret) {
921                 ulist_free(fs_info->qgroup_ulist);
922                 fs_info->qgroup_ulist = NULL;
923         }
924         mutex_unlock(&fs_info->qgroup_ioctl_lock);
925         return ret;
926 }
927
928 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
929                         struct btrfs_fs_info *fs_info)
930 {
931         struct btrfs_root *tree_root = fs_info->tree_root;
932         struct btrfs_root *quota_root;
933         int ret = 0;
934
935         mutex_lock(&fs_info->qgroup_ioctl_lock);
936         if (!fs_info->quota_root)
937                 goto out;
938         spin_lock(&fs_info->qgroup_lock);
939         fs_info->quota_enabled = 0;
940         fs_info->pending_quota_state = 0;
941         quota_root = fs_info->quota_root;
942         fs_info->quota_root = NULL;
943         spin_unlock(&fs_info->qgroup_lock);
944
945         btrfs_free_qgroup_config(fs_info);
946
947         ret = btrfs_clean_quota_tree(trans, quota_root);
948         if (ret)
949                 goto out;
950
951         ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
952         if (ret)
953                 goto out;
954
955         list_del(&quota_root->dirty_list);
956
957         btrfs_tree_lock(quota_root->node);
958         clean_tree_block(trans, tree_root, quota_root->node);
959         btrfs_tree_unlock(quota_root->node);
960         btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
961
962         free_extent_buffer(quota_root->node);
963         free_extent_buffer(quota_root->commit_root);
964         kfree(quota_root);
965 out:
966         mutex_unlock(&fs_info->qgroup_ioctl_lock);
967         return ret;
968 }
969
970 static void qgroup_dirty(struct btrfs_fs_info *fs_info,
971                          struct btrfs_qgroup *qgroup)
972 {
973         if (list_empty(&qgroup->dirty))
974                 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
975 }
976
977 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
978                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
979 {
980         struct btrfs_root *quota_root;
981         struct btrfs_qgroup *parent;
982         struct btrfs_qgroup *member;
983         struct btrfs_qgroup_list *list;
984         int ret = 0;
985
986         mutex_lock(&fs_info->qgroup_ioctl_lock);
987         quota_root = fs_info->quota_root;
988         if (!quota_root) {
989                 ret = -EINVAL;
990                 goto out;
991         }
992         member = find_qgroup_rb(fs_info, src);
993         parent = find_qgroup_rb(fs_info, dst);
994         if (!member || !parent) {
995                 ret = -EINVAL;
996                 goto out;
997         }
998
999         /* check if such qgroup relation exist firstly */
1000         list_for_each_entry(list, &member->groups, next_group) {
1001                 if (list->group == parent) {
1002                         ret = -EEXIST;
1003                         goto out;
1004                 }
1005         }
1006
1007         ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1008         if (ret)
1009                 goto out;
1010
1011         ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1012         if (ret) {
1013                 del_qgroup_relation_item(trans, quota_root, src, dst);
1014                 goto out;
1015         }
1016
1017         spin_lock(&fs_info->qgroup_lock);
1018         ret = add_relation_rb(quota_root->fs_info, src, dst);
1019         spin_unlock(&fs_info->qgroup_lock);
1020 out:
1021         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1022         return ret;
1023 }
1024
1025 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1026                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1027 {
1028         struct btrfs_root *quota_root;
1029         struct btrfs_qgroup *parent;
1030         struct btrfs_qgroup *member;
1031         struct btrfs_qgroup_list *list;
1032         int ret = 0;
1033         int err;
1034
1035         mutex_lock(&fs_info->qgroup_ioctl_lock);
1036         quota_root = fs_info->quota_root;
1037         if (!quota_root) {
1038                 ret = -EINVAL;
1039                 goto out;
1040         }
1041
1042         member = find_qgroup_rb(fs_info, src);
1043         parent = find_qgroup_rb(fs_info, dst);
1044         if (!member || !parent) {
1045                 ret = -EINVAL;
1046                 goto out;
1047         }
1048
1049         /* check if such qgroup relation exist firstly */
1050         list_for_each_entry(list, &member->groups, next_group) {
1051                 if (list->group == parent)
1052                         goto exist;
1053         }
1054         ret = -ENOENT;
1055         goto out;
1056 exist:
1057         ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1058         err = del_qgroup_relation_item(trans, quota_root, dst, src);
1059         if (err && !ret)
1060                 ret = err;
1061
1062         spin_lock(&fs_info->qgroup_lock);
1063         del_relation_rb(fs_info, src, dst);
1064         spin_unlock(&fs_info->qgroup_lock);
1065 out:
1066         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1067         return ret;
1068 }
1069
1070 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1071                         struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
1072 {
1073         struct btrfs_root *quota_root;
1074         struct btrfs_qgroup *qgroup;
1075         int ret = 0;
1076
1077         mutex_lock(&fs_info->qgroup_ioctl_lock);
1078         quota_root = fs_info->quota_root;
1079         if (!quota_root) {
1080                 ret = -EINVAL;
1081                 goto out;
1082         }
1083         qgroup = find_qgroup_rb(fs_info, qgroupid);
1084         if (qgroup) {
1085                 ret = -EEXIST;
1086                 goto out;
1087         }
1088
1089         ret = add_qgroup_item(trans, quota_root, qgroupid);
1090         if (ret)
1091                 goto out;
1092
1093         spin_lock(&fs_info->qgroup_lock);
1094         qgroup = add_qgroup_rb(fs_info, qgroupid);
1095         spin_unlock(&fs_info->qgroup_lock);
1096
1097         if (IS_ERR(qgroup))
1098                 ret = PTR_ERR(qgroup);
1099 out:
1100         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1101         return ret;
1102 }
1103
1104 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1105                         struct btrfs_fs_info *fs_info, u64 qgroupid)
1106 {
1107         struct btrfs_root *quota_root;
1108         struct btrfs_qgroup *qgroup;
1109         int ret = 0;
1110
1111         mutex_lock(&fs_info->qgroup_ioctl_lock);
1112         quota_root = fs_info->quota_root;
1113         if (!quota_root) {
1114                 ret = -EINVAL;
1115                 goto out;
1116         }
1117
1118         qgroup = find_qgroup_rb(fs_info, qgroupid);
1119         if (!qgroup) {
1120                 ret = -ENOENT;
1121                 goto out;
1122         } else {
1123                 /* check if there are no relations to this qgroup */
1124                 if (!list_empty(&qgroup->groups) ||
1125                     !list_empty(&qgroup->members)) {
1126                         ret = -EBUSY;
1127                         goto out;
1128                 }
1129         }
1130         ret = del_qgroup_item(trans, quota_root, qgroupid);
1131
1132         spin_lock(&fs_info->qgroup_lock);
1133         del_qgroup_rb(quota_root->fs_info, qgroupid);
1134         spin_unlock(&fs_info->qgroup_lock);
1135 out:
1136         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1137         return ret;
1138 }
1139
1140 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1141                        struct btrfs_fs_info *fs_info, u64 qgroupid,
1142                        struct btrfs_qgroup_limit *limit)
1143 {
1144         struct btrfs_root *quota_root;
1145         struct btrfs_qgroup *qgroup;
1146         int ret = 0;
1147
1148         mutex_lock(&fs_info->qgroup_ioctl_lock);
1149         quota_root = fs_info->quota_root;
1150         if (!quota_root) {
1151                 ret = -EINVAL;
1152                 goto out;
1153         }
1154
1155         qgroup = find_qgroup_rb(fs_info, qgroupid);
1156         if (!qgroup) {
1157                 ret = -ENOENT;
1158                 goto out;
1159         }
1160         ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
1161                                        limit->flags, limit->max_rfer,
1162                                        limit->max_excl, limit->rsv_rfer,
1163                                        limit->rsv_excl);
1164         if (ret) {
1165                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1166                 btrfs_info(fs_info, "unable to update quota limit for %llu",
1167                        qgroupid);
1168         }
1169
1170         spin_lock(&fs_info->qgroup_lock);
1171         qgroup->lim_flags = limit->flags;
1172         qgroup->max_rfer = limit->max_rfer;
1173         qgroup->max_excl = limit->max_excl;
1174         qgroup->rsv_rfer = limit->rsv_rfer;
1175         qgroup->rsv_excl = limit->rsv_excl;
1176         spin_unlock(&fs_info->qgroup_lock);
1177 out:
1178         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1179         return ret;
1180 }
1181 static int comp_oper(struct btrfs_qgroup_operation *oper1,
1182                      struct btrfs_qgroup_operation *oper2)
1183 {
1184         if (oper1->bytenr < oper2->bytenr)
1185                 return -1;
1186         if (oper1->bytenr > oper2->bytenr)
1187                 return 1;
1188         if (oper1->seq < oper2->seq)
1189                 return -1;
1190         if (oper1->seq > oper2->seq)
1191                 return -1;
1192         if (oper1->ref_root < oper2->ref_root)
1193                 return -1;
1194         if (oper1->ref_root > oper2->ref_root)
1195                 return 1;
1196         if (oper1->type < oper2->type)
1197                 return -1;
1198         if (oper1->type > oper2->type)
1199                 return 1;
1200         return 0;
1201 }
1202
1203 static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
1204                               struct btrfs_qgroup_operation *oper)
1205 {
1206         struct rb_node **p;
1207         struct rb_node *parent = NULL;
1208         struct btrfs_qgroup_operation *cur;
1209         int cmp;
1210
1211         spin_lock(&fs_info->qgroup_op_lock);
1212         p = &fs_info->qgroup_op_tree.rb_node;
1213         while (*p) {
1214                 parent = *p;
1215                 cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
1216                 cmp = comp_oper(cur, oper);
1217                 if (cmp < 0) {
1218                         p = &(*p)->rb_right;
1219                 } else if (cmp) {
1220                         p = &(*p)->rb_left;
1221                 } else {
1222                         spin_unlock(&fs_info->qgroup_op_lock);
1223                         return -EEXIST;
1224                 }
1225         }
1226         rb_link_node(&oper->n, parent, p);
1227         rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
1228         spin_unlock(&fs_info->qgroup_op_lock);
1229         return 0;
1230 }
1231
1232 /*
1233  * Record a quota operation for processing later on.
1234  * @trans: the transaction we are adding the delayed op to.
1235  * @fs_info: the fs_info for this fs.
1236  * @ref_root: the root of the reference we are acting on,
1237  * @bytenr: the bytenr we are acting on.
1238  * @num_bytes: the number of bytes in the reference.
1239  * @type: the type of operation this is.
1240  * @mod_seq: do we need to get a sequence number for looking up roots.
1241  *
1242  * We just add it to our trans qgroup_ref_list and carry on and process these
1243  * operations in order at some later point.  If the reference root isn't a fs
1244  * root then we don't bother with doing anything.
1245  *
1246  * MUST BE HOLDING THE REF LOCK.
1247  */
1248 int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1249                             struct btrfs_fs_info *fs_info, u64 ref_root,
1250                             u64 bytenr, u64 num_bytes,
1251                             enum btrfs_qgroup_operation_type type, int mod_seq)
1252 {
1253         struct btrfs_qgroup_operation *oper;
1254         int ret;
1255
1256         if (!is_fstree(ref_root) || !fs_info->quota_enabled)
1257                 return 0;
1258
1259         oper = kmalloc(sizeof(*oper), GFP_NOFS);
1260         if (!oper)
1261                 return -ENOMEM;
1262
1263         oper->ref_root = ref_root;
1264         oper->bytenr = bytenr;
1265         oper->num_bytes = num_bytes;
1266         oper->type = type;
1267         oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
1268         INIT_LIST_HEAD(&oper->elem.list);
1269         oper->elem.seq = 0;
1270         ret = insert_qgroup_oper(fs_info, oper);
1271         if (ret) {
1272                 /* Shouldn't happen so have an assert for developers */
1273                 ASSERT(0);
1274                 kfree(oper);
1275                 return ret;
1276         }
1277         list_add_tail(&oper->list, &trans->qgroup_ref_list);
1278
1279         if (mod_seq)
1280                 btrfs_get_tree_mod_seq(fs_info, &oper->elem);
1281
1282         return 0;
1283 }
1284
1285 /*
1286  * The easy accounting, if we are adding/removing the only ref for an extent
1287  * then this qgroup and all of the parent qgroups get their refrence and
1288  * exclusive counts adjusted.
1289  */
1290 static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1291                                   struct btrfs_qgroup_operation *oper)
1292 {
1293         struct btrfs_qgroup *qgroup;
1294         struct ulist *tmp;
1295         struct btrfs_qgroup_list *glist;
1296         struct ulist_node *unode;
1297         struct ulist_iterator uiter;
1298         int sign = 0;
1299         int ret = 0;
1300
1301         tmp = ulist_alloc(GFP_NOFS);
1302         if (!tmp)
1303                 return -ENOMEM;
1304
1305         spin_lock(&fs_info->qgroup_lock);
1306         if (!fs_info->quota_root)
1307                 goto out;
1308         qgroup = find_qgroup_rb(fs_info, oper->ref_root);
1309         if (!qgroup)
1310                 goto out;
1311         switch (oper->type) {
1312         case BTRFS_QGROUP_OPER_ADD_EXCL:
1313                 sign = 1;
1314                 break;
1315         case BTRFS_QGROUP_OPER_SUB_EXCL:
1316                 sign = -1;
1317                 break;
1318         default:
1319                 ASSERT(0);
1320         }
1321         qgroup->rfer += sign * oper->num_bytes;
1322         qgroup->rfer_cmpr += sign * oper->num_bytes;
1323
1324         WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
1325         qgroup->excl += sign * oper->num_bytes;
1326         qgroup->excl_cmpr += sign * oper->num_bytes;
1327
1328         qgroup_dirty(fs_info, qgroup);
1329
1330         /* Get all of the parent groups that contain this qgroup */
1331         list_for_each_entry(glist, &qgroup->groups, next_group) {
1332                 ret = ulist_add(tmp, glist->group->qgroupid,
1333                                 ptr_to_u64(glist->group), GFP_ATOMIC);
1334                 if (ret < 0)
1335                         goto out;
1336         }
1337
1338         /* Iterate all of the parents and adjust their reference counts */
1339         ULIST_ITER_INIT(&uiter);
1340         while ((unode = ulist_next(tmp, &uiter))) {
1341                 qgroup = u64_to_ptr(unode->aux);
1342                 qgroup->rfer += sign * oper->num_bytes;
1343                 qgroup->rfer_cmpr += sign * oper->num_bytes;
1344                 qgroup->excl += sign * oper->num_bytes;
1345                 if (sign < 0)
1346                         WARN_ON(qgroup->excl < oper->num_bytes);
1347                 qgroup->excl_cmpr += sign * oper->num_bytes;
1348                 qgroup_dirty(fs_info, qgroup);
1349
1350                 /* Add any parents of the parents */
1351                 list_for_each_entry(glist, &qgroup->groups, next_group) {
1352                         ret = ulist_add(tmp, glist->group->qgroupid,
1353                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1354                         if (ret < 0)
1355                                 goto out;
1356                 }
1357         }
1358         ret = 0;
1359 out:
1360         spin_unlock(&fs_info->qgroup_lock);
1361         ulist_free(tmp);
1362         return ret;
1363 }
1364
1365 /*
1366  * Walk all of the roots that pointed to our bytenr and adjust their refcnts as
1367  * properly.
1368  */
1369 static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
1370                                   u64 root_to_skip, struct ulist *tmp,
1371                                   struct ulist *roots, struct ulist *qgroups,
1372                                   u64 seq, int *old_roots, int rescan)
1373 {
1374         struct ulist_node *unode;
1375         struct ulist_iterator uiter;
1376         struct ulist_node *tmp_unode;
1377         struct ulist_iterator tmp_uiter;
1378         struct btrfs_qgroup *qg;
1379         int ret;
1380
1381         ULIST_ITER_INIT(&uiter);
1382         while ((unode = ulist_next(roots, &uiter))) {
1383                 /* We don't count our current root here */
1384                 if (unode->val == root_to_skip)
1385                         continue;
1386                 qg = find_qgroup_rb(fs_info, unode->val);
1387                 if (!qg)
1388                         continue;
1389                 /*
1390                  * We could have a pending removal of this same ref so we may
1391                  * not have actually found our ref root when doing
1392                  * btrfs_find_all_roots, so we need to keep track of how many
1393                  * old roots we find in case we removed ours and added a
1394                  * different one at the same time.  I don't think this could
1395                  * happen in practice but that sort of thinking leads to pain
1396                  * and suffering and to the dark side.
1397                  */
1398                 (*old_roots)++;
1399
1400                 ulist_reinit(tmp);
1401                 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1402                                 GFP_ATOMIC);
1403                 if (ret < 0)
1404                         return ret;
1405                 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC);
1406                 if (ret < 0)
1407                         return ret;
1408                 ULIST_ITER_INIT(&tmp_uiter);
1409                 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1410                         struct btrfs_qgroup_list *glist;
1411
1412                         qg = u64_to_ptr(tmp_unode->aux);
1413                         /*
1414                          * We use this sequence number to keep from having to
1415                          * run the whole list and 0 out the refcnt every time.
1416                          * We basically use sequnce as the known 0 count and
1417                          * then add 1 everytime we see a qgroup.  This is how we
1418                          * get how many of the roots actually point up to the
1419                          * upper level qgroups in order to determine exclusive
1420                          * counts.
1421                          *
1422                          * For rescan we want to set old_refcnt to seq so our
1423                          * exclusive calculations end up correct.
1424                          */
1425                         if (rescan)
1426                                 qg->old_refcnt = seq;
1427                         else if (qg->old_refcnt < seq)
1428                                 qg->old_refcnt = seq + 1;
1429                         else
1430                                 qg->old_refcnt++;
1431
1432                         if (qg->new_refcnt < seq)
1433                                 qg->new_refcnt = seq + 1;
1434                         else
1435                                 qg->new_refcnt++;
1436                         list_for_each_entry(glist, &qg->groups, next_group) {
1437                                 ret = ulist_add(qgroups, glist->group->qgroupid,
1438                                                 ptr_to_u64(glist->group),
1439                                                 GFP_ATOMIC);
1440                                 if (ret < 0)
1441                                         return ret;
1442                                 ret = ulist_add(tmp, glist->group->qgroupid,
1443                                                 ptr_to_u64(glist->group),
1444                                                 GFP_ATOMIC);
1445                                 if (ret < 0)
1446                                         return ret;
1447                         }
1448                 }
1449         }
1450         return 0;
1451 }
1452
1453 /*
1454  * We need to walk forward in our operation tree and account for any roots that
1455  * were deleted after we made this operation.
1456  */
1457 static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
1458                                        struct btrfs_qgroup_operation *oper,
1459                                        struct ulist *tmp,
1460                                        struct ulist *qgroups, u64 seq,
1461                                        int *old_roots)
1462 {
1463         struct ulist_node *unode;
1464         struct ulist_iterator uiter;
1465         struct btrfs_qgroup *qg;
1466         struct btrfs_qgroup_operation *tmp_oper;
1467         struct rb_node *n;
1468         int ret;
1469
1470         ulist_reinit(tmp);
1471
1472         /*
1473          * We only walk forward in the tree since we're only interested in
1474          * removals that happened _after_  our operation.
1475          */
1476         spin_lock(&fs_info->qgroup_op_lock);
1477         n = rb_next(&oper->n);
1478         spin_unlock(&fs_info->qgroup_op_lock);
1479         if (!n)
1480                 return 0;
1481         tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1482         while (tmp_oper->bytenr == oper->bytenr) {
1483                 /*
1484                  * If it's not a removal we don't care, additions work out
1485                  * properly with our refcnt tracking.
1486                  */
1487                 if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
1488                     tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
1489                         goto next;
1490                 qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
1491                 if (!qg)
1492                         goto next;
1493                 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1494                                 GFP_ATOMIC);
1495                 if (ret) {
1496                         if (ret < 0)
1497                                 return ret;
1498                         /*
1499                          * We only want to increase old_roots if this qgroup is
1500                          * not already in the list of qgroups.  If it is already
1501                          * there then that means it must have been re-added or
1502                          * the delete will be discarded because we had an
1503                          * existing ref that we haven't looked up yet.  In this
1504                          * case we don't want to increase old_roots.  So if ret
1505                          * == 1 then we know that this is the first time we've
1506                          * seen this qgroup and we can bump the old_roots.
1507                          */
1508                         (*old_roots)++;
1509                         ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
1510                                         GFP_ATOMIC);
1511                         if (ret < 0)
1512                                 return ret;
1513                 }
1514 next:
1515                 spin_lock(&fs_info->qgroup_op_lock);
1516                 n = rb_next(&tmp_oper->n);
1517                 spin_unlock(&fs_info->qgroup_op_lock);
1518                 if (!n)
1519                         break;
1520                 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1521         }
1522
1523         /* Ok now process the qgroups we found */
1524         ULIST_ITER_INIT(&uiter);
1525         while ((unode = ulist_next(tmp, &uiter))) {
1526                 struct btrfs_qgroup_list *glist;
1527
1528                 qg = u64_to_ptr(unode->aux);
1529                 if (qg->old_refcnt < seq)
1530                         qg->old_refcnt = seq + 1;
1531                 else
1532                         qg->old_refcnt++;
1533                 if (qg->new_refcnt < seq)
1534                         qg->new_refcnt = seq + 1;
1535                 else
1536                         qg->new_refcnt++;
1537                 list_for_each_entry(glist, &qg->groups, next_group) {
1538                         ret = ulist_add(qgroups, glist->group->qgroupid,
1539                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1540                         if (ret < 0)
1541                                 return ret;
1542                         ret = ulist_add(tmp, glist->group->qgroupid,
1543                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1544                         if (ret < 0)
1545                                 return ret;
1546                 }
1547         }
1548         return 0;
1549 }
1550
1551 /* Add refcnt for the newly added reference. */
1552 static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
1553                                   struct btrfs_qgroup_operation *oper,
1554                                   struct btrfs_qgroup *qgroup,
1555                                   struct ulist *tmp, struct ulist *qgroups,
1556                                   u64 seq)
1557 {
1558         struct ulist_node *unode;
1559         struct ulist_iterator uiter;
1560         struct btrfs_qgroup *qg;
1561         int ret;
1562
1563         ulist_reinit(tmp);
1564         ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
1565                         GFP_ATOMIC);
1566         if (ret < 0)
1567                 return ret;
1568         ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
1569                         GFP_ATOMIC);
1570         if (ret < 0)
1571                 return ret;
1572         ULIST_ITER_INIT(&uiter);
1573         while ((unode = ulist_next(tmp, &uiter))) {
1574                 struct btrfs_qgroup_list *glist;
1575
1576                 qg = u64_to_ptr(unode->aux);
1577                 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
1578                         if (qg->new_refcnt < seq)
1579                                 qg->new_refcnt = seq + 1;
1580                         else
1581                                 qg->new_refcnt++;
1582                 } else {
1583                         if (qg->old_refcnt < seq)
1584                                 qg->old_refcnt = seq + 1;
1585                         else
1586                                 qg->old_refcnt++;
1587                 }
1588                 list_for_each_entry(glist, &qg->groups, next_group) {
1589                         ret = ulist_add(tmp, glist->group->qgroupid,
1590                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1591                         if (ret < 0)
1592                                 return ret;
1593                         ret = ulist_add(qgroups, glist->group->qgroupid,
1594                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1595                         if (ret < 0)
1596                                 return ret;
1597                 }
1598         }
1599         return 0;
1600 }
1601
1602 /*
1603  * This adjusts the counters for all referenced qgroups if need be.
1604  */
1605 static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
1606                                   u64 root_to_skip, u64 num_bytes,
1607                                   struct ulist *qgroups, u64 seq,
1608                                   int old_roots, int new_roots, int rescan)
1609 {
1610         struct ulist_node *unode;
1611         struct ulist_iterator uiter;
1612         struct btrfs_qgroup *qg;
1613         u64 cur_new_count, cur_old_count;
1614
1615         ULIST_ITER_INIT(&uiter);
1616         while ((unode = ulist_next(qgroups, &uiter))) {
1617                 bool dirty = false;
1618
1619                 qg = u64_to_ptr(unode->aux);
1620                 /*
1621                  * Wasn't referenced before but is now, add to the reference
1622                  * counters.
1623                  */
1624                 if (qg->old_refcnt <= seq && qg->new_refcnt > seq) {
1625                         qg->rfer += num_bytes;
1626                         qg->rfer_cmpr += num_bytes;
1627                         dirty = true;
1628                 }
1629
1630                 /*
1631                  * Was referenced before but isn't now, subtract from the
1632                  * reference counters.
1633                  */
1634                 if (qg->old_refcnt > seq && qg->new_refcnt <= seq) {
1635                         qg->rfer -= num_bytes;
1636                         qg->rfer_cmpr -= num_bytes;
1637                         dirty = true;
1638                 }
1639
1640                 if (qg->old_refcnt < seq)
1641                         cur_old_count = 0;
1642                 else
1643                         cur_old_count = qg->old_refcnt - seq;
1644                 if (qg->new_refcnt < seq)
1645                         cur_new_count = 0;
1646                 else
1647                         cur_new_count = qg->new_refcnt - seq;
1648
1649                 /*
1650                  * If our refcount was the same as the roots previously but our
1651                  * new count isn't the same as the number of roots now then we
1652                  * went from having a exclusive reference on this range to not.
1653                  */
1654                 if (old_roots && cur_old_count == old_roots &&
1655                     (cur_new_count != new_roots || new_roots == 0)) {
1656                         WARN_ON(cur_new_count != new_roots && new_roots == 0);
1657                         qg->excl -= num_bytes;
1658                         qg->excl_cmpr -= num_bytes;
1659                         dirty = true;
1660                 }
1661
1662                 /*
1663                  * If we didn't reference all the roots before but now we do we
1664                  * have an exclusive reference to this range.
1665                  */
1666                 if ((!old_roots || (old_roots && cur_old_count != old_roots))
1667                     && cur_new_count == new_roots) {
1668                         qg->excl += num_bytes;
1669                         qg->excl_cmpr += num_bytes;
1670                         dirty = true;
1671                 }
1672
1673                 if (dirty)
1674                         qgroup_dirty(fs_info, qg);
1675         }
1676         return 0;
1677 }
1678
1679 /*
1680  * If we removed a data extent and there were other references for that bytenr
1681  * then we need to lookup all referenced roots to make sure we still don't
1682  * reference this bytenr.  If we do then we can just discard this operation.
1683  */
1684 static int check_existing_refs(struct btrfs_trans_handle *trans,
1685                                struct btrfs_fs_info *fs_info,
1686                                struct btrfs_qgroup_operation *oper)
1687 {
1688         struct ulist *roots = NULL;
1689         struct ulist_node *unode;
1690         struct ulist_iterator uiter;
1691         int ret = 0;
1692
1693         ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
1694                                    oper->elem.seq, &roots);
1695         if (ret < 0)
1696                 return ret;
1697         ret = 0;
1698
1699         ULIST_ITER_INIT(&uiter);
1700         while ((unode = ulist_next(roots, &uiter))) {
1701                 if (unode->val == oper->ref_root) {
1702                         ret = 1;
1703                         break;
1704                 }
1705         }
1706         ulist_free(roots);
1707         btrfs_put_tree_mod_seq(fs_info, &oper->elem);
1708
1709         return ret;
1710 }
1711
1712 /*
1713  * If we share a reference across multiple roots then we may need to adjust
1714  * various qgroups referenced and exclusive counters.  The basic premise is this
1715  *
1716  * 1) We have seq to represent a 0 count.  Instead of looping through all of the
1717  * qgroups and resetting their refcount to 0 we just constantly bump this
1718  * sequence number to act as the base reference count.  This means that if
1719  * anybody is equal to or below this sequence they were never referenced.  We
1720  * jack this sequence up by the number of roots we found each time in order to
1721  * make sure we don't have any overlap.
1722  *
1723  * 2) We first search all the roots that reference the area _except_ the root
1724  * we're acting on currently.  This makes up the old_refcnt of all the qgroups
1725  * before.
1726  *
1727  * 3) We walk all of the qgroups referenced by the root we are currently acting
1728  * on, and will either adjust old_refcnt in the case of a removal or the
1729  * new_refcnt in the case of an addition.
1730  *
1731  * 4) Finally we walk all the qgroups that are referenced by this range
1732  * including the root we are acting on currently.  We will adjust the counters
1733  * based on the number of roots we had and will have after this operation.
1734  *
1735  * Take this example as an illustration
1736  *
1737  *                      [qgroup 1/0]
1738  *                   /         |          \
1739  *              [qg 0/0]   [qg 0/1]     [qg 0/2]
1740  *                 \          |            /
1741  *                [        extent           ]
1742  *
1743  * Say we are adding a reference that is covered by qg 0/0.  The first step
1744  * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
1745  * old_roots being 2.  Because it is adding new_roots will be 1.  We then go
1746  * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
1747  * new_refcnt, bringing it to 3.  We then walk through all of the qgroups, we
1748  * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
1749  * reference and thus must add the size to the referenced bytes.  Everything
1750  * else is the same so nothing else changes.
1751  */
1752 static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
1753                                     struct btrfs_fs_info *fs_info,
1754                                     struct btrfs_qgroup_operation *oper)
1755 {
1756         struct ulist *roots = NULL;
1757         struct ulist *qgroups, *tmp;
1758         struct btrfs_qgroup *qgroup;
1759         struct seq_list elem = {};
1760         u64 seq;
1761         int old_roots = 0;
1762         int new_roots = 0;
1763         int ret = 0;
1764
1765         if (oper->elem.seq) {
1766                 ret = check_existing_refs(trans, fs_info, oper);
1767                 if (ret < 0)
1768                         return ret;
1769                 if (ret)
1770                         return 0;
1771         }
1772
1773         qgroups = ulist_alloc(GFP_NOFS);
1774         if (!qgroups)
1775                 return -ENOMEM;
1776
1777         tmp = ulist_alloc(GFP_NOFS);
1778         if (!tmp)
1779                 return -ENOMEM;
1780
1781         btrfs_get_tree_mod_seq(fs_info, &elem);
1782         ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
1783                                    &roots);
1784         btrfs_put_tree_mod_seq(fs_info, &elem);
1785         if (ret < 0) {
1786                 ulist_free(qgroups);
1787                 ulist_free(tmp);
1788                 return ret;
1789         }
1790         spin_lock(&fs_info->qgroup_lock);
1791         qgroup = find_qgroup_rb(fs_info, oper->ref_root);
1792         if (!qgroup)
1793                 goto out;
1794         seq = fs_info->qgroup_seq;
1795
1796         /*
1797          * So roots is the list of all the roots currently pointing at the
1798          * bytenr, including the ref we are adding if we are adding, or not if
1799          * we are removing a ref.  So we pass in the ref_root to skip that root
1800          * in our calculations.  We set old_refnct and new_refcnt cause who the
1801          * hell knows what everything looked like before, and it doesn't matter
1802          * except...
1803          */
1804         ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
1805                                      seq, &old_roots, 0);
1806         if (ret < 0)
1807                 goto out;
1808
1809         /*
1810          * Now adjust the refcounts of the qgroups that care about this
1811          * reference, either the old_count in the case of removal or new_count
1812          * in the case of an addition.
1813          */
1814         ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
1815                                      seq);
1816         if (ret < 0)
1817                 goto out;
1818
1819         /*
1820          * ...in the case of removals.  If we had a removal before we got around
1821          * to processing this operation then we need to find that guy and count
1822          * his references as if they really existed so we don't end up screwing
1823          * up the exclusive counts.  Then whenever we go to process the delete
1824          * everything will be grand and we can account for whatever exclusive
1825          * changes need to be made there.  We also have to pass in old_roots so
1826          * we have an accurate count of the roots as it pertains to this
1827          * operations view of the world.
1828          */
1829         ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
1830                                           &old_roots);
1831         if (ret < 0)
1832                 goto out;
1833
1834         /*
1835          * We are adding our root, need to adjust up the number of roots,
1836          * otherwise old_roots is the number of roots we want.
1837          */
1838         if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
1839                 new_roots = old_roots + 1;
1840         } else {
1841                 new_roots = old_roots;
1842                 old_roots++;
1843         }
1844         fs_info->qgroup_seq += old_roots + 1;
1845
1846
1847         /*
1848          * And now the magic happens, bless Arne for having a pretty elegant
1849          * solution for this.
1850          */
1851         qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
1852                                qgroups, seq, old_roots, new_roots, 0);
1853 out:
1854         spin_unlock(&fs_info->qgroup_lock);
1855         ulist_free(qgroups);
1856         ulist_free(roots);
1857         ulist_free(tmp);
1858         return ret;
1859 }
1860
1861 /*
1862  * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1863  * from the fs. First, all roots referencing the extent are searched, and
1864  * then the space is accounted accordingly to the different roots. The
1865  * accounting algorithm works in 3 steps documented inline.
1866  */
1867 static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
1868                                 struct btrfs_fs_info *fs_info,
1869                                 struct btrfs_qgroup_operation *oper)
1870 {
1871         int ret = 0;
1872
1873         if (!fs_info->quota_enabled)
1874                 return 0;
1875
1876         BUG_ON(!fs_info->quota_root);
1877
1878         mutex_lock(&fs_info->qgroup_rescan_lock);
1879         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1880                 if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
1881                         mutex_unlock(&fs_info->qgroup_rescan_lock);
1882                         return 0;
1883                 }
1884         }
1885         mutex_unlock(&fs_info->qgroup_rescan_lock);
1886
1887         ASSERT(is_fstree(oper->ref_root));
1888
1889         switch (oper->type) {
1890         case BTRFS_QGROUP_OPER_ADD_EXCL:
1891         case BTRFS_QGROUP_OPER_SUB_EXCL:
1892                 ret = qgroup_excl_accounting(fs_info, oper);
1893                 break;
1894         case BTRFS_QGROUP_OPER_ADD_SHARED:
1895         case BTRFS_QGROUP_OPER_SUB_SHARED:
1896                 ret = qgroup_shared_accounting(trans, fs_info, oper);
1897                 break;
1898         default:
1899                 ASSERT(0);
1900         }
1901         return ret;
1902 }
1903
1904 /*
1905  * Needs to be called everytime we run delayed refs, even if there is an error
1906  * in order to cleanup outstanding operations.
1907  */
1908 int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
1909                                     struct btrfs_fs_info *fs_info)
1910 {
1911         struct btrfs_qgroup_operation *oper;
1912         int ret = 0;
1913
1914         while (!list_empty(&trans->qgroup_ref_list)) {
1915                 oper = list_first_entry(&trans->qgroup_ref_list,
1916                                         struct btrfs_qgroup_operation, list);
1917                 list_del_init(&oper->list);
1918                 if (!ret || !trans->aborted)
1919                         ret = btrfs_qgroup_account(trans, fs_info, oper);
1920                 spin_lock(&fs_info->qgroup_op_lock);
1921                 rb_erase(&oper->n, &fs_info->qgroup_op_tree);
1922                 spin_unlock(&fs_info->qgroup_op_lock);
1923                 btrfs_put_tree_mod_seq(fs_info, &oper->elem);
1924                 kfree(oper);
1925         }
1926         return ret;
1927 }
1928
1929 /*
1930  * called from commit_transaction. Writes all changed qgroups to disk.
1931  */
1932 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
1933                       struct btrfs_fs_info *fs_info)
1934 {
1935         struct btrfs_root *quota_root = fs_info->quota_root;
1936         int ret = 0;
1937         int start_rescan_worker = 0;
1938
1939         if (!quota_root)
1940                 goto out;
1941
1942         if (!fs_info->quota_enabled && fs_info->pending_quota_state)
1943                 start_rescan_worker = 1;
1944
1945         fs_info->quota_enabled = fs_info->pending_quota_state;
1946
1947         spin_lock(&fs_info->qgroup_lock);
1948         while (!list_empty(&fs_info->dirty_qgroups)) {
1949                 struct btrfs_qgroup *qgroup;
1950                 qgroup = list_first_entry(&fs_info->dirty_qgroups,
1951                                           struct btrfs_qgroup, dirty);
1952                 list_del_init(&qgroup->dirty);
1953                 spin_unlock(&fs_info->qgroup_lock);
1954                 ret = update_qgroup_info_item(trans, quota_root, qgroup);
1955                 if (ret)
1956                         fs_info->qgroup_flags |=
1957                                         BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1958                 spin_lock(&fs_info->qgroup_lock);
1959         }
1960         if (fs_info->quota_enabled)
1961                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
1962         else
1963                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
1964         spin_unlock(&fs_info->qgroup_lock);
1965
1966         ret = update_qgroup_status_item(trans, fs_info, quota_root);
1967         if (ret)
1968                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1969
1970         if (!ret && start_rescan_worker) {
1971                 ret = qgroup_rescan_init(fs_info, 0, 1);
1972                 if (!ret) {
1973                         qgroup_rescan_zero_tracking(fs_info);
1974                         btrfs_queue_work(fs_info->qgroup_rescan_workers,
1975                                          &fs_info->qgroup_rescan_work);
1976                 }
1977                 ret = 0;
1978         }
1979
1980 out:
1981
1982         return ret;
1983 }
1984
1985 /*
1986  * copy the acounting information between qgroups. This is necessary when a
1987  * snapshot or a subvolume is created
1988  */
1989 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1990                          struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
1991                          struct btrfs_qgroup_inherit *inherit)
1992 {
1993         int ret = 0;
1994         int i;
1995         u64 *i_qgroups;
1996         struct btrfs_root *quota_root = fs_info->quota_root;
1997         struct btrfs_qgroup *srcgroup;
1998         struct btrfs_qgroup *dstgroup;
1999         u32 level_size = 0;
2000         u64 nums;
2001
2002         mutex_lock(&fs_info->qgroup_ioctl_lock);
2003         if (!fs_info->quota_enabled)
2004                 goto out;
2005
2006         if (!quota_root) {
2007                 ret = -EINVAL;
2008                 goto out;
2009         }
2010
2011         if (inherit) {
2012                 i_qgroups = (u64 *)(inherit + 1);
2013                 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
2014                        2 * inherit->num_excl_copies;
2015                 for (i = 0; i < nums; ++i) {
2016                         srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
2017                         if (!srcgroup) {
2018                                 ret = -EINVAL;
2019                                 goto out;
2020                         }
2021                         ++i_qgroups;
2022                 }
2023         }
2024
2025         /*
2026          * create a tracking group for the subvol itself
2027          */
2028         ret = add_qgroup_item(trans, quota_root, objectid);
2029         if (ret)
2030                 goto out;
2031
2032         if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
2033                 ret = update_qgroup_limit_item(trans, quota_root, objectid,
2034                                                inherit->lim.flags,
2035                                                inherit->lim.max_rfer,
2036                                                inherit->lim.max_excl,
2037                                                inherit->lim.rsv_rfer,
2038                                                inherit->lim.rsv_excl);
2039                 if (ret)
2040                         goto out;
2041         }
2042
2043         if (srcid) {
2044                 struct btrfs_root *srcroot;
2045                 struct btrfs_key srckey;
2046                 int srcroot_level;
2047
2048                 srckey.objectid = srcid;
2049                 srckey.type = BTRFS_ROOT_ITEM_KEY;
2050                 srckey.offset = (u64)-1;
2051                 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
2052                 if (IS_ERR(srcroot)) {
2053                         ret = PTR_ERR(srcroot);
2054                         goto out;
2055                 }
2056
2057                 rcu_read_lock();
2058                 srcroot_level = btrfs_header_level(srcroot->node);
2059                 level_size = btrfs_level_size(srcroot, srcroot_level);
2060                 rcu_read_unlock();
2061         }
2062
2063         /*
2064          * add qgroup to all inherited groups
2065          */
2066         if (inherit) {
2067                 i_qgroups = (u64 *)(inherit + 1);
2068                 for (i = 0; i < inherit->num_qgroups; ++i) {
2069                         ret = add_qgroup_relation_item(trans, quota_root,
2070                                                        objectid, *i_qgroups);
2071                         if (ret)
2072                                 goto out;
2073                         ret = add_qgroup_relation_item(trans, quota_root,
2074                                                        *i_qgroups, objectid);
2075                         if (ret)
2076                                 goto out;
2077                         ++i_qgroups;
2078                 }
2079         }
2080
2081
2082         spin_lock(&fs_info->qgroup_lock);
2083
2084         dstgroup = add_qgroup_rb(fs_info, objectid);
2085         if (IS_ERR(dstgroup)) {
2086                 ret = PTR_ERR(dstgroup);
2087                 goto unlock;
2088         }
2089
2090         if (srcid) {
2091                 srcgroup = find_qgroup_rb(fs_info, srcid);
2092                 if (!srcgroup)
2093                         goto unlock;
2094
2095                 /*
2096                  * We call inherit after we clone the root in order to make sure
2097                  * our counts don't go crazy, so at this point the only
2098                  * difference between the two roots should be the root node.
2099                  */
2100                 dstgroup->rfer = srcgroup->rfer;
2101                 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
2102                 dstgroup->excl = level_size;
2103                 dstgroup->excl_cmpr = level_size;
2104                 srcgroup->excl = level_size;
2105                 srcgroup->excl_cmpr = level_size;
2106                 qgroup_dirty(fs_info, dstgroup);
2107                 qgroup_dirty(fs_info, srcgroup);
2108         }
2109
2110         if (!inherit)
2111                 goto unlock;
2112
2113         i_qgroups = (u64 *)(inherit + 1);
2114         for (i = 0; i < inherit->num_qgroups; ++i) {
2115                 ret = add_relation_rb(quota_root->fs_info, objectid,
2116                                       *i_qgroups);
2117                 if (ret)
2118                         goto unlock;
2119                 ++i_qgroups;
2120         }
2121
2122         for (i = 0; i <  inherit->num_ref_copies; ++i) {
2123                 struct btrfs_qgroup *src;
2124                 struct btrfs_qgroup *dst;
2125
2126                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2127                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2128
2129                 if (!src || !dst) {
2130                         ret = -EINVAL;
2131                         goto unlock;
2132                 }
2133
2134                 dst->rfer = src->rfer - level_size;
2135                 dst->rfer_cmpr = src->rfer_cmpr - level_size;
2136                 i_qgroups += 2;
2137         }
2138         for (i = 0; i <  inherit->num_excl_copies; ++i) {
2139                 struct btrfs_qgroup *src;
2140                 struct btrfs_qgroup *dst;
2141
2142                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2143                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2144
2145                 if (!src || !dst) {
2146                         ret = -EINVAL;
2147                         goto unlock;
2148                 }
2149
2150                 dst->excl = src->excl + level_size;
2151                 dst->excl_cmpr = src->excl_cmpr + level_size;
2152                 i_qgroups += 2;
2153         }
2154
2155 unlock:
2156         spin_unlock(&fs_info->qgroup_lock);
2157 out:
2158         mutex_unlock(&fs_info->qgroup_ioctl_lock);
2159         return ret;
2160 }
2161
2162 /*
2163  * reserve some space for a qgroup and all its parents. The reservation takes
2164  * place with start_transaction or dealloc_reserve, similar to ENOSPC
2165  * accounting. If not enough space is available, EDQUOT is returned.
2166  * We assume that the requested space is new for all qgroups.
2167  */
2168 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
2169 {
2170         struct btrfs_root *quota_root;
2171         struct btrfs_qgroup *qgroup;
2172         struct btrfs_fs_info *fs_info = root->fs_info;
2173         u64 ref_root = root->root_key.objectid;
2174         int ret = 0;
2175         struct ulist_node *unode;
2176         struct ulist_iterator uiter;
2177
2178         if (!is_fstree(ref_root))
2179                 return 0;
2180
2181         if (num_bytes == 0)
2182                 return 0;
2183
2184         spin_lock(&fs_info->qgroup_lock);
2185         quota_root = fs_info->quota_root;
2186         if (!quota_root)
2187                 goto out;
2188
2189         qgroup = find_qgroup_rb(fs_info, ref_root);
2190         if (!qgroup)
2191                 goto out;
2192
2193         /*
2194          * in a first step, we check all affected qgroups if any limits would
2195          * be exceeded
2196          */
2197         ulist_reinit(fs_info->qgroup_ulist);
2198         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
2199                         (uintptr_t)qgroup, GFP_ATOMIC);
2200         if (ret < 0)
2201                 goto out;
2202         ULIST_ITER_INIT(&uiter);
2203         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2204                 struct btrfs_qgroup *qg;
2205                 struct btrfs_qgroup_list *glist;
2206
2207                 qg = u64_to_ptr(unode->aux);
2208
2209                 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
2210                     qg->reserved + (s64)qg->rfer + num_bytes >
2211                     qg->max_rfer) {
2212                         ret = -EDQUOT;
2213                         goto out;
2214                 }
2215
2216                 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
2217                     qg->reserved + (s64)qg->excl + num_bytes >
2218                     qg->max_excl) {
2219                         ret = -EDQUOT;
2220                         goto out;
2221                 }
2222
2223                 list_for_each_entry(glist, &qg->groups, next_group) {
2224                         ret = ulist_add(fs_info->qgroup_ulist,
2225                                         glist->group->qgroupid,
2226                                         (uintptr_t)glist->group, GFP_ATOMIC);
2227                         if (ret < 0)
2228                                 goto out;
2229                 }
2230         }
2231         ret = 0;
2232         /*
2233          * no limits exceeded, now record the reservation into all qgroups
2234          */
2235         ULIST_ITER_INIT(&uiter);
2236         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2237                 struct btrfs_qgroup *qg;
2238
2239                 qg = u64_to_ptr(unode->aux);
2240
2241                 qg->reserved += num_bytes;
2242         }
2243
2244 out:
2245         spin_unlock(&fs_info->qgroup_lock);
2246         return ret;
2247 }
2248
2249 void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
2250 {
2251         struct btrfs_root *quota_root;
2252         struct btrfs_qgroup *qgroup;
2253         struct btrfs_fs_info *fs_info = root->fs_info;
2254         struct ulist_node *unode;
2255         struct ulist_iterator uiter;
2256         u64 ref_root = root->root_key.objectid;
2257         int ret = 0;
2258
2259         if (!is_fstree(ref_root))
2260                 return;
2261
2262         if (num_bytes == 0)
2263                 return;
2264
2265         spin_lock(&fs_info->qgroup_lock);
2266
2267         quota_root = fs_info->quota_root;
2268         if (!quota_root)
2269                 goto out;
2270
2271         qgroup = find_qgroup_rb(fs_info, ref_root);
2272         if (!qgroup)
2273                 goto out;
2274
2275         ulist_reinit(fs_info->qgroup_ulist);
2276         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
2277                         (uintptr_t)qgroup, GFP_ATOMIC);
2278         if (ret < 0)
2279                 goto out;
2280         ULIST_ITER_INIT(&uiter);
2281         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2282                 struct btrfs_qgroup *qg;
2283                 struct btrfs_qgroup_list *glist;
2284
2285                 qg = u64_to_ptr(unode->aux);
2286
2287                 qg->reserved -= num_bytes;
2288
2289                 list_for_each_entry(glist, &qg->groups, next_group) {
2290                         ret = ulist_add(fs_info->qgroup_ulist,
2291                                         glist->group->qgroupid,
2292                                         (uintptr_t)glist->group, GFP_ATOMIC);
2293                         if (ret < 0)
2294                                 goto out;
2295                 }
2296         }
2297
2298 out:
2299         spin_unlock(&fs_info->qgroup_lock);
2300 }
2301
2302 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
2303 {
2304         if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
2305                 return;
2306         btrfs_err(trans->root->fs_info,
2307                 "qgroups not uptodate in trans handle %p:  list is%s empty, "
2308                 "seq is %#x.%x",
2309                 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
2310                 (u32)(trans->delayed_ref_elem.seq >> 32),
2311                 (u32)trans->delayed_ref_elem.seq);
2312         BUG();
2313 }
2314
2315 /*
2316  * returns < 0 on error, 0 when more leafs are to be scanned.
2317  * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
2318  */
2319 static int
2320 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
2321                    struct btrfs_trans_handle *trans, struct ulist *qgroups,
2322                    struct ulist *tmp, struct extent_buffer *scratch_leaf)
2323 {
2324         struct btrfs_key found;
2325         struct ulist *roots = NULL;
2326         struct seq_list tree_mod_seq_elem = {};
2327         u64 num_bytes;
2328         u64 seq;
2329         int new_roots;
2330         int slot;
2331         int ret;
2332
2333         path->leave_spinning = 1;
2334         mutex_lock(&fs_info->qgroup_rescan_lock);
2335         ret = btrfs_search_slot_for_read(fs_info->extent_root,
2336                                          &fs_info->qgroup_rescan_progress,
2337                                          path, 1, 0);
2338
2339         pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
2340                  fs_info->qgroup_rescan_progress.objectid,
2341                  fs_info->qgroup_rescan_progress.type,
2342                  fs_info->qgroup_rescan_progress.offset, ret);
2343
2344         if (ret) {
2345                 /*
2346                  * The rescan is about to end, we will not be scanning any
2347                  * further blocks. We cannot unset the RESCAN flag here, because
2348                  * we want to commit the transaction if everything went well.
2349                  * To make the live accounting work in this phase, we set our
2350                  * scan progress pointer such that every real extent objectid
2351                  * will be smaller.
2352                  */
2353                 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
2354                 btrfs_release_path(path);
2355                 mutex_unlock(&fs_info->qgroup_rescan_lock);
2356                 return ret;
2357         }
2358
2359         btrfs_item_key_to_cpu(path->nodes[0], &found,
2360                               btrfs_header_nritems(path->nodes[0]) - 1);
2361         fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
2362
2363         btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2364         memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
2365         slot = path->slots[0];
2366         btrfs_release_path(path);
2367         mutex_unlock(&fs_info->qgroup_rescan_lock);
2368
2369         for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
2370                 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
2371                 if (found.type != BTRFS_EXTENT_ITEM_KEY &&
2372                     found.type != BTRFS_METADATA_ITEM_KEY)
2373                         continue;
2374                 if (found.type == BTRFS_METADATA_ITEM_KEY)
2375                         num_bytes = fs_info->extent_root->leafsize;
2376                 else
2377                         num_bytes = found.offset;
2378
2379                 ulist_reinit(qgroups);
2380                 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
2381                                            &roots);
2382                 if (ret < 0)
2383                         goto out;
2384                 spin_lock(&fs_info->qgroup_lock);
2385                 seq = fs_info->qgroup_seq;
2386                 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
2387
2388                 new_roots = 0;
2389                 ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups,
2390                                              seq, &new_roots, 1);
2391                 if (ret < 0) {
2392                         spin_unlock(&fs_info->qgroup_lock);
2393                         ulist_free(roots);
2394                         goto out;
2395                 }
2396
2397                 ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups,
2398                                              seq, 0, new_roots, 1);
2399                 if (ret < 0) {
2400                         spin_unlock(&fs_info->qgroup_lock);
2401                         ulist_free(roots);
2402                         goto out;
2403                 }
2404                 spin_unlock(&fs_info->qgroup_lock);
2405                 ulist_free(roots);
2406         }
2407 out:
2408         btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2409
2410         return ret;
2411 }
2412
2413 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2414 {
2415         struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
2416                                                      qgroup_rescan_work);
2417         struct btrfs_path *path;
2418         struct btrfs_trans_handle *trans = NULL;
2419         struct ulist *tmp = NULL, *qgroups = NULL;
2420         struct extent_buffer *scratch_leaf = NULL;
2421         int err = -ENOMEM;
2422
2423         path = btrfs_alloc_path();
2424         if (!path)
2425                 goto out;
2426         qgroups = ulist_alloc(GFP_NOFS);
2427         if (!qgroups)
2428                 goto out;
2429         tmp = ulist_alloc(GFP_NOFS);
2430         if (!tmp)
2431                 goto out;
2432         scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2433         if (!scratch_leaf)
2434                 goto out;
2435
2436         err = 0;
2437         while (!err) {
2438                 trans = btrfs_start_transaction(fs_info->fs_root, 0);
2439                 if (IS_ERR(trans)) {
2440                         err = PTR_ERR(trans);
2441                         break;
2442                 }
2443                 if (!fs_info->quota_enabled) {
2444                         err = -EINTR;
2445                 } else {
2446                         err = qgroup_rescan_leaf(fs_info, path, trans,
2447                                                  qgroups, tmp, scratch_leaf);
2448                 }
2449                 if (err > 0)
2450                         btrfs_commit_transaction(trans, fs_info->fs_root);
2451                 else
2452                         btrfs_end_transaction(trans, fs_info->fs_root);
2453         }
2454
2455 out:
2456         kfree(scratch_leaf);
2457         ulist_free(qgroups);
2458         btrfs_free_path(path);
2459
2460         mutex_lock(&fs_info->qgroup_rescan_lock);
2461         fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2462
2463         if (err == 2 &&
2464             fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2465                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2466         } else if (err < 0) {
2467                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2468         }
2469         mutex_unlock(&fs_info->qgroup_rescan_lock);
2470
2471         if (err >= 0) {
2472                 btrfs_info(fs_info, "qgroup scan completed%s",
2473                         err == 2 ? " (inconsistency flag cleared)" : "");
2474         } else {
2475                 btrfs_err(fs_info, "qgroup scan failed with %d", err);
2476         }
2477
2478         complete_all(&fs_info->qgroup_rescan_completion);
2479 }
2480
2481 /*
2482  * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2483  * memory required for the rescan context.
2484  */
2485 static int
2486 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2487                    int init_flags)
2488 {
2489         int ret = 0;
2490
2491         if (!init_flags &&
2492             (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
2493              !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
2494                 ret = -EINVAL;
2495                 goto err;
2496         }
2497
2498         mutex_lock(&fs_info->qgroup_rescan_lock);
2499         spin_lock(&fs_info->qgroup_lock);
2500
2501         if (init_flags) {
2502                 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2503                         ret = -EINPROGRESS;
2504                 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2505                         ret = -EINVAL;
2506
2507                 if (ret) {
2508                         spin_unlock(&fs_info->qgroup_lock);
2509                         mutex_unlock(&fs_info->qgroup_rescan_lock);
2510                         goto err;
2511                 }
2512
2513                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2514         }
2515
2516         memset(&fs_info->qgroup_rescan_progress, 0,
2517                 sizeof(fs_info->qgroup_rescan_progress));
2518         fs_info->qgroup_rescan_progress.objectid = progress_objectid;
2519
2520         spin_unlock(&fs_info->qgroup_lock);
2521         mutex_unlock(&fs_info->qgroup_rescan_lock);
2522
2523         init_completion(&fs_info->qgroup_rescan_completion);
2524
2525         memset(&fs_info->qgroup_rescan_work, 0,
2526                sizeof(fs_info->qgroup_rescan_work));
2527         btrfs_init_work(&fs_info->qgroup_rescan_work,
2528                         btrfs_qgroup_rescan_worker, NULL, NULL);
2529
2530         if (ret) {
2531 err:
2532                 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
2533                 return ret;
2534         }
2535
2536         return 0;
2537 }
2538
2539 static void
2540 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
2541 {
2542         struct rb_node *n;
2543         struct btrfs_qgroup *qgroup;
2544
2545         spin_lock(&fs_info->qgroup_lock);
2546         /* clear all current qgroup tracking information */
2547         for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2548                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
2549                 qgroup->rfer = 0;
2550                 qgroup->rfer_cmpr = 0;
2551                 qgroup->excl = 0;
2552                 qgroup->excl_cmpr = 0;
2553         }
2554         spin_unlock(&fs_info->qgroup_lock);
2555 }
2556
2557 int
2558 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2559 {
2560         int ret = 0;
2561         struct btrfs_trans_handle *trans;
2562
2563         ret = qgroup_rescan_init(fs_info, 0, 1);
2564         if (ret)
2565                 return ret;
2566
2567         /*
2568          * We have set the rescan_progress to 0, which means no more
2569          * delayed refs will be accounted by btrfs_qgroup_account_ref.
2570          * However, btrfs_qgroup_account_ref may be right after its call
2571          * to btrfs_find_all_roots, in which case it would still do the
2572          * accounting.
2573          * To solve this, we're committing the transaction, which will
2574          * ensure we run all delayed refs and only after that, we are
2575          * going to clear all tracking information for a clean start.
2576          */
2577
2578         trans = btrfs_join_transaction(fs_info->fs_root);
2579         if (IS_ERR(trans)) {
2580                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2581                 return PTR_ERR(trans);
2582         }
2583         ret = btrfs_commit_transaction(trans, fs_info->fs_root);
2584         if (ret) {
2585                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2586                 return ret;
2587         }
2588
2589         qgroup_rescan_zero_tracking(fs_info);
2590
2591         btrfs_queue_work(fs_info->qgroup_rescan_workers,
2592                          &fs_info->qgroup_rescan_work);
2593
2594         return 0;
2595 }
2596
2597 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
2598 {
2599         int running;
2600         int ret = 0;
2601
2602         mutex_lock(&fs_info->qgroup_rescan_lock);
2603         spin_lock(&fs_info->qgroup_lock);
2604         running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2605         spin_unlock(&fs_info->qgroup_lock);
2606         mutex_unlock(&fs_info->qgroup_rescan_lock);
2607
2608         if (running)
2609                 ret = wait_for_completion_interruptible(
2610                                         &fs_info->qgroup_rescan_completion);
2611
2612         return ret;
2613 }
2614
2615 /*
2616  * this is only called from open_ctree where we're still single threaded, thus
2617  * locking is omitted here.
2618  */
2619 void
2620 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
2621 {
2622         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2623                 btrfs_queue_work(fs_info->qgroup_rescan_workers,
2624                                  &fs_info->qgroup_rescan_work);
2625 }