btrfs: replace path->slots[0] with otherwise unused variable 'slot'
[linux-2.6-block.git] / fs / btrfs / qgroup.c
CommitLineData
bed92eae
AJ
1/*
2 * Copyright (C) 2011 STRATO. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h>
23#include <linux/rbtree.h>
24#include <linux/slab.h>
25#include <linux/workqueue.h>
55e301fd 26#include <linux/btrfs.h>
bed92eae
AJ
27
28#include "ctree.h"
29#include "transaction.h"
30#include "disk-io.h"
31#include "locking.h"
32#include "ulist.h"
bed92eae 33#include "backref.h"
2f232036 34#include "extent_io.h"
bed92eae
AJ
35
36/* TODO XXX FIXME
37 * - subvol delete -> delete when ref goes to 0? delete limits also?
38 * - reorganize keys
39 * - compressed
40 * - sync
bed92eae
AJ
41 * - copy also limits on subvol creation
42 * - limit
43 * - caches fuer ulists
44 * - performance benchmarks
45 * - check all ioctl parameters
46 */
47
48/*
49 * one struct for each qgroup, organized in fs_info->qgroup_tree.
50 */
51struct btrfs_qgroup {
52 u64 qgroupid;
53
54 /*
55 * state
56 */
57 u64 rfer; /* referenced */
58 u64 rfer_cmpr; /* referenced compressed */
59 u64 excl; /* exclusive */
60 u64 excl_cmpr; /* exclusive compressed */
61
62 /*
63 * limits
64 */
65 u64 lim_flags; /* which limits are set */
66 u64 max_rfer;
67 u64 max_excl;
68 u64 rsv_rfer;
69 u64 rsv_excl;
70
71 /*
72 * reservation tracking
73 */
74 u64 reserved;
75
76 /*
77 * lists
78 */
79 struct list_head groups; /* groups this group is member of */
80 struct list_head members; /* groups that are members of this group */
81 struct list_head dirty; /* dirty groups */
82 struct rb_node node; /* tree of qgroups */
83
84 /*
85 * temp variables for accounting operations
86 */
87 u64 tag;
88 u64 refcnt;
89};
90
91/*
92 * glue structure to represent the relations between qgroups.
93 */
94struct btrfs_qgroup_list {
95 struct list_head next_group;
96 struct list_head next_member;
97 struct btrfs_qgroup *group;
98 struct btrfs_qgroup *member;
99};
100
b382a324
JS
101static int
102qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
103 int init_flags);
104static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
2f232036 105
58400fce 106/* must be called with qgroup_ioctl_lock held */
bed92eae
AJ
107static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
108 u64 qgroupid)
109{
110 struct rb_node *n = fs_info->qgroup_tree.rb_node;
111 struct btrfs_qgroup *qgroup;
112
113 while (n) {
114 qgroup = rb_entry(n, struct btrfs_qgroup, node);
115 if (qgroup->qgroupid < qgroupid)
116 n = n->rb_left;
117 else if (qgroup->qgroupid > qgroupid)
118 n = n->rb_right;
119 else
120 return qgroup;
121 }
122 return NULL;
123}
124
125/* must be called with qgroup_lock held */
126static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
127 u64 qgroupid)
128{
129 struct rb_node **p = &fs_info->qgroup_tree.rb_node;
130 struct rb_node *parent = NULL;
131 struct btrfs_qgroup *qgroup;
132
133 while (*p) {
134 parent = *p;
135 qgroup = rb_entry(parent, struct btrfs_qgroup, node);
136
137 if (qgroup->qgroupid < qgroupid)
138 p = &(*p)->rb_left;
139 else if (qgroup->qgroupid > qgroupid)
140 p = &(*p)->rb_right;
141 else
142 return qgroup;
143 }
144
145 qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
146 if (!qgroup)
147 return ERR_PTR(-ENOMEM);
148
149 qgroup->qgroupid = qgroupid;
150 INIT_LIST_HEAD(&qgroup->groups);
151 INIT_LIST_HEAD(&qgroup->members);
152 INIT_LIST_HEAD(&qgroup->dirty);
153
154 rb_link_node(&qgroup->node, parent, p);
155 rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
156
157 return qgroup;
158}
159
4082bd3d 160static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
bed92eae 161{
bed92eae
AJ
162 struct btrfs_qgroup_list *list;
163
bed92eae 164 list_del(&qgroup->dirty);
bed92eae
AJ
165 while (!list_empty(&qgroup->groups)) {
166 list = list_first_entry(&qgroup->groups,
167 struct btrfs_qgroup_list, next_group);
168 list_del(&list->next_group);
169 list_del(&list->next_member);
170 kfree(list);
171 }
172
173 while (!list_empty(&qgroup->members)) {
174 list = list_first_entry(&qgroup->members,
175 struct btrfs_qgroup_list, next_member);
176 list_del(&list->next_group);
177 list_del(&list->next_member);
178 kfree(list);
179 }
180 kfree(qgroup);
4082bd3d 181}
bed92eae 182
4082bd3d
WS
183/* must be called with qgroup_lock held */
184static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
185{
186 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
187
188 if (!qgroup)
189 return -ENOENT;
190
191 rb_erase(&qgroup->node, &fs_info->qgroup_tree);
192 __del_qgroup_rb(qgroup);
bed92eae
AJ
193 return 0;
194}
195
196/* must be called with qgroup_lock held */
197static int add_relation_rb(struct btrfs_fs_info *fs_info,
198 u64 memberid, u64 parentid)
199{
200 struct btrfs_qgroup *member;
201 struct btrfs_qgroup *parent;
202 struct btrfs_qgroup_list *list;
203
204 member = find_qgroup_rb(fs_info, memberid);
205 parent = find_qgroup_rb(fs_info, parentid);
206 if (!member || !parent)
207 return -ENOENT;
208
209 list = kzalloc(sizeof(*list), GFP_ATOMIC);
210 if (!list)
211 return -ENOMEM;
212
213 list->group = parent;
214 list->member = member;
215 list_add_tail(&list->next_group, &member->groups);
216 list_add_tail(&list->next_member, &parent->members);
217
218 return 0;
219}
220
221/* must be called with qgroup_lock held */
222static int del_relation_rb(struct btrfs_fs_info *fs_info,
223 u64 memberid, u64 parentid)
224{
225 struct btrfs_qgroup *member;
226 struct btrfs_qgroup *parent;
227 struct btrfs_qgroup_list *list;
228
229 member = find_qgroup_rb(fs_info, memberid);
230 parent = find_qgroup_rb(fs_info, parentid);
231 if (!member || !parent)
232 return -ENOENT;
233
234 list_for_each_entry(list, &member->groups, next_group) {
235 if (list->group == parent) {
236 list_del(&list->next_group);
237 list_del(&list->next_member);
238 kfree(list);
239 return 0;
240 }
241 }
242 return -ENOENT;
243}
244
245/*
246 * The full config is read in one go, only called from open_ctree()
247 * It doesn't use any locking, as at this point we're still single-threaded
248 */
249int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
250{
251 struct btrfs_key key;
252 struct btrfs_key found_key;
253 struct btrfs_root *quota_root = fs_info->quota_root;
254 struct btrfs_path *path = NULL;
255 struct extent_buffer *l;
256 int slot;
257 int ret = 0;
258 u64 flags = 0;
b382a324 259 u64 rescan_progress = 0;
bed92eae
AJ
260
261 if (!fs_info->quota_enabled)
262 return 0;
263
1e8f9158
WS
264 fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
265 if (!fs_info->qgroup_ulist) {
266 ret = -ENOMEM;
267 goto out;
268 }
269
bed92eae
AJ
270 path = btrfs_alloc_path();
271 if (!path) {
272 ret = -ENOMEM;
273 goto out;
274 }
275
276 /* default this to quota off, in case no status key is found */
277 fs_info->qgroup_flags = 0;
278
279 /*
280 * pass 1: read status, all qgroup infos and limits
281 */
282 key.objectid = 0;
283 key.type = 0;
284 key.offset = 0;
285 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
286 if (ret)
287 goto out;
288
289 while (1) {
290 struct btrfs_qgroup *qgroup;
291
292 slot = path->slots[0];
293 l = path->nodes[0];
294 btrfs_item_key_to_cpu(l, &found_key, slot);
295
296 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
297 struct btrfs_qgroup_status_item *ptr;
298
299 ptr = btrfs_item_ptr(l, slot,
300 struct btrfs_qgroup_status_item);
301
302 if (btrfs_qgroup_status_version(l, ptr) !=
303 BTRFS_QGROUP_STATUS_VERSION) {
304 printk(KERN_ERR
305 "btrfs: old qgroup version, quota disabled\n");
306 goto out;
307 }
308 if (btrfs_qgroup_status_generation(l, ptr) !=
309 fs_info->generation) {
310 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
311 printk(KERN_ERR
312 "btrfs: qgroup generation mismatch, "
313 "marked as inconsistent\n");
314 }
315 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
316 ptr);
b382a324 317 rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
bed92eae
AJ
318 goto next1;
319 }
320
321 if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
322 found_key.type != BTRFS_QGROUP_LIMIT_KEY)
323 goto next1;
324
325 qgroup = find_qgroup_rb(fs_info, found_key.offset);
326 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
327 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
328 printk(KERN_ERR "btrfs: inconsitent qgroup config\n");
329 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
330 }
331 if (!qgroup) {
332 qgroup = add_qgroup_rb(fs_info, found_key.offset);
333 if (IS_ERR(qgroup)) {
334 ret = PTR_ERR(qgroup);
335 goto out;
336 }
337 }
338 switch (found_key.type) {
339 case BTRFS_QGROUP_INFO_KEY: {
340 struct btrfs_qgroup_info_item *ptr;
341
342 ptr = btrfs_item_ptr(l, slot,
343 struct btrfs_qgroup_info_item);
344 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
345 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
346 qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
347 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
348 /* generation currently unused */
349 break;
350 }
351 case BTRFS_QGROUP_LIMIT_KEY: {
352 struct btrfs_qgroup_limit_item *ptr;
353
354 ptr = btrfs_item_ptr(l, slot,
355 struct btrfs_qgroup_limit_item);
356 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
357 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
358 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
359 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
360 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
361 break;
362 }
363 }
364next1:
365 ret = btrfs_next_item(quota_root, path);
366 if (ret < 0)
367 goto out;
368 if (ret)
369 break;
370 }
371 btrfs_release_path(path);
372
373 /*
374 * pass 2: read all qgroup relations
375 */
376 key.objectid = 0;
377 key.type = BTRFS_QGROUP_RELATION_KEY;
378 key.offset = 0;
379 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
380 if (ret)
381 goto out;
382 while (1) {
383 slot = path->slots[0];
384 l = path->nodes[0];
385 btrfs_item_key_to_cpu(l, &found_key, slot);
386
387 if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
388 goto next2;
389
390 if (found_key.objectid > found_key.offset) {
391 /* parent <- member, not needed to build config */
392 /* FIXME should we omit the key completely? */
393 goto next2;
394 }
395
396 ret = add_relation_rb(fs_info, found_key.objectid,
397 found_key.offset);
ff24858c
AJ
398 if (ret == -ENOENT) {
399 printk(KERN_WARNING
400 "btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
c1c9ff7c 401 found_key.objectid, found_key.offset);
ff24858c
AJ
402 ret = 0; /* ignore the error */
403 }
bed92eae
AJ
404 if (ret)
405 goto out;
406next2:
407 ret = btrfs_next_item(quota_root, path);
408 if (ret < 0)
409 goto out;
410 if (ret)
411 break;
412 }
413out:
414 fs_info->qgroup_flags |= flags;
415 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
416 fs_info->quota_enabled = 0;
417 fs_info->pending_quota_state = 0;
b382a324
JS
418 } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
419 ret >= 0) {
420 ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
bed92eae
AJ
421 }
422 btrfs_free_path(path);
423
eb1716af 424 if (ret < 0) {
1e8f9158 425 ulist_free(fs_info->qgroup_ulist);
eb1716af 426 fs_info->qgroup_ulist = NULL;
b382a324 427 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
eb1716af 428 }
1e8f9158 429
bed92eae
AJ
430 return ret < 0 ? ret : 0;
431}
432
433/*
e685da14
WS
434 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
435 * first two are in single-threaded paths.And for the third one, we have set
436 * quota_root to be null with qgroup_lock held before, so it is safe to clean
437 * up the in-memory structures without qgroup_lock held.
bed92eae
AJ
438 */
439void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
440{
441 struct rb_node *n;
442 struct btrfs_qgroup *qgroup;
bed92eae
AJ
443
444 while ((n = rb_first(&fs_info->qgroup_tree))) {
445 qgroup = rb_entry(n, struct btrfs_qgroup, node);
446 rb_erase(n, &fs_info->qgroup_tree);
4082bd3d 447 __del_qgroup_rb(qgroup);
bed92eae 448 }
1e7bac1e
WS
449 /*
450 * we call btrfs_free_qgroup_config() when umounting
451 * filesystem and disabling quota, so we set qgroup_ulit
452 * to be null here to avoid double free.
453 */
1e8f9158 454 ulist_free(fs_info->qgroup_ulist);
1e7bac1e 455 fs_info->qgroup_ulist = NULL;
bed92eae
AJ
456}
457
458static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
459 struct btrfs_root *quota_root,
460 u64 src, u64 dst)
461{
462 int ret;
463 struct btrfs_path *path;
464 struct btrfs_key key;
465
466 path = btrfs_alloc_path();
467 if (!path)
468 return -ENOMEM;
469
470 key.objectid = src;
471 key.type = BTRFS_QGROUP_RELATION_KEY;
472 key.offset = dst;
473
474 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
475
476 btrfs_mark_buffer_dirty(path->nodes[0]);
477
478 btrfs_free_path(path);
479 return ret;
480}
481
482static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
483 struct btrfs_root *quota_root,
484 u64 src, u64 dst)
485{
486 int ret;
487 struct btrfs_path *path;
488 struct btrfs_key key;
489
490 path = btrfs_alloc_path();
491 if (!path)
492 return -ENOMEM;
493
494 key.objectid = src;
495 key.type = BTRFS_QGROUP_RELATION_KEY;
496 key.offset = dst;
497
498 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
499 if (ret < 0)
500 goto out;
501
502 if (ret > 0) {
503 ret = -ENOENT;
504 goto out;
505 }
506
507 ret = btrfs_del_item(trans, quota_root, path);
508out:
509 btrfs_free_path(path);
510 return ret;
511}
512
513static int add_qgroup_item(struct btrfs_trans_handle *trans,
514 struct btrfs_root *quota_root, u64 qgroupid)
515{
516 int ret;
517 struct btrfs_path *path;
518 struct btrfs_qgroup_info_item *qgroup_info;
519 struct btrfs_qgroup_limit_item *qgroup_limit;
520 struct extent_buffer *leaf;
521 struct btrfs_key key;
522
523 path = btrfs_alloc_path();
524 if (!path)
525 return -ENOMEM;
526
527 key.objectid = 0;
528 key.type = BTRFS_QGROUP_INFO_KEY;
529 key.offset = qgroupid;
530
531 ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
532 sizeof(*qgroup_info));
533 if (ret)
534 goto out;
535
536 leaf = path->nodes[0];
537 qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
538 struct btrfs_qgroup_info_item);
539 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
540 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
541 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
542 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
543 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
544
545 btrfs_mark_buffer_dirty(leaf);
546
547 btrfs_release_path(path);
548
549 key.type = BTRFS_QGROUP_LIMIT_KEY;
550 ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
551 sizeof(*qgroup_limit));
552 if (ret)
553 goto out;
554
555 leaf = path->nodes[0];
556 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
557 struct btrfs_qgroup_limit_item);
558 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
559 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
560 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
561 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
562 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
563
564 btrfs_mark_buffer_dirty(leaf);
565
566 ret = 0;
567out:
568 btrfs_free_path(path);
569 return ret;
570}
571
572static int del_qgroup_item(struct btrfs_trans_handle *trans,
573 struct btrfs_root *quota_root, u64 qgroupid)
574{
575 int ret;
576 struct btrfs_path *path;
577 struct btrfs_key key;
578
579 path = btrfs_alloc_path();
580 if (!path)
581 return -ENOMEM;
582
583 key.objectid = 0;
584 key.type = BTRFS_QGROUP_INFO_KEY;
585 key.offset = qgroupid;
586 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
587 if (ret < 0)
588 goto out;
589
590 if (ret > 0) {
591 ret = -ENOENT;
592 goto out;
593 }
594
595 ret = btrfs_del_item(trans, quota_root, path);
596 if (ret)
597 goto out;
598
599 btrfs_release_path(path);
600
601 key.type = BTRFS_QGROUP_LIMIT_KEY;
602 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
603 if (ret < 0)
604 goto out;
605
606 if (ret > 0) {
607 ret = -ENOENT;
608 goto out;
609 }
610
611 ret = btrfs_del_item(trans, quota_root, path);
612
613out:
614 btrfs_free_path(path);
615 return ret;
616}
617
618static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
619 struct btrfs_root *root, u64 qgroupid,
620 u64 flags, u64 max_rfer, u64 max_excl,
621 u64 rsv_rfer, u64 rsv_excl)
622{
623 struct btrfs_path *path;
624 struct btrfs_key key;
625 struct extent_buffer *l;
626 struct btrfs_qgroup_limit_item *qgroup_limit;
627 int ret;
628 int slot;
629
630 key.objectid = 0;
631 key.type = BTRFS_QGROUP_LIMIT_KEY;
632 key.offset = qgroupid;
633
634 path = btrfs_alloc_path();
84cbe2f7
WS
635 if (!path)
636 return -ENOMEM;
637
bed92eae
AJ
638 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
639 if (ret > 0)
640 ret = -ENOENT;
641
642 if (ret)
643 goto out;
644
645 l = path->nodes[0];
646 slot = path->slots[0];
647 qgroup_limit = btrfs_item_ptr(l, path->slots[0],
648 struct btrfs_qgroup_limit_item);
649 btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
650 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
651 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
652 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
653 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
654
655 btrfs_mark_buffer_dirty(l);
656
657out:
658 btrfs_free_path(path);
659 return ret;
660}
661
662static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
663 struct btrfs_root *root,
664 struct btrfs_qgroup *qgroup)
665{
666 struct btrfs_path *path;
667 struct btrfs_key key;
668 struct extent_buffer *l;
669 struct btrfs_qgroup_info_item *qgroup_info;
670 int ret;
671 int slot;
672
673 key.objectid = 0;
674 key.type = BTRFS_QGROUP_INFO_KEY;
675 key.offset = qgroup->qgroupid;
676
677 path = btrfs_alloc_path();
84cbe2f7
WS
678 if (!path)
679 return -ENOMEM;
680
bed92eae
AJ
681 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
682 if (ret > 0)
683 ret = -ENOENT;
684
685 if (ret)
686 goto out;
687
688 l = path->nodes[0];
689 slot = path->slots[0];
690 qgroup_info = btrfs_item_ptr(l, path->slots[0],
691 struct btrfs_qgroup_info_item);
692 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
693 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
694 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
695 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
696 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
697
698 btrfs_mark_buffer_dirty(l);
699
700out:
701 btrfs_free_path(path);
702 return ret;
703}
704
705static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
706 struct btrfs_fs_info *fs_info,
707 struct btrfs_root *root)
708{
709 struct btrfs_path *path;
710 struct btrfs_key key;
711 struct extent_buffer *l;
712 struct btrfs_qgroup_status_item *ptr;
713 int ret;
714 int slot;
715
716 key.objectid = 0;
717 key.type = BTRFS_QGROUP_STATUS_KEY;
718 key.offset = 0;
719
720 path = btrfs_alloc_path();
84cbe2f7
WS
721 if (!path)
722 return -ENOMEM;
723
bed92eae
AJ
724 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
725 if (ret > 0)
726 ret = -ENOENT;
727
728 if (ret)
729 goto out;
730
731 l = path->nodes[0];
732 slot = path->slots[0];
733 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
734 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
735 btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
2f232036
JS
736 btrfs_set_qgroup_status_rescan(l, ptr,
737 fs_info->qgroup_rescan_progress.objectid);
bed92eae
AJ
738
739 btrfs_mark_buffer_dirty(l);
740
741out:
742 btrfs_free_path(path);
743 return ret;
744}
745
746/*
747 * called with qgroup_lock held
748 */
749static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
750 struct btrfs_root *root)
751{
752 struct btrfs_path *path;
753 struct btrfs_key key;
06b3a860 754 struct extent_buffer *leaf = NULL;
bed92eae 755 int ret;
06b3a860 756 int nr = 0;
bed92eae 757
bed92eae
AJ
758 path = btrfs_alloc_path();
759 if (!path)
760 return -ENOMEM;
761
06b3a860
WS
762 path->leave_spinning = 1;
763
764 key.objectid = 0;
765 key.offset = 0;
766 key.type = 0;
bed92eae 767
06b3a860 768 while (1) {
bed92eae 769 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
06b3a860
WS
770 if (ret < 0)
771 goto out;
772 leaf = path->nodes[0];
773 nr = btrfs_header_nritems(leaf);
774 if (!nr)
bed92eae 775 break;
06b3a860
WS
776 /*
777 * delete the leaf one by one
778 * since the whole tree is going
779 * to be deleted.
780 */
781 path->slots[0] = 0;
782 ret = btrfs_del_items(trans, root, path, 0, nr);
bed92eae
AJ
783 if (ret)
784 goto out;
06b3a860 785
bed92eae
AJ
786 btrfs_release_path(path);
787 }
788 ret = 0;
789out:
790 root->fs_info->pending_quota_state = 0;
791 btrfs_free_path(path);
792 return ret;
793}
794
795int btrfs_quota_enable(struct btrfs_trans_handle *trans,
796 struct btrfs_fs_info *fs_info)
797{
798 struct btrfs_root *quota_root;
7708f029 799 struct btrfs_root *tree_root = fs_info->tree_root;
bed92eae
AJ
800 struct btrfs_path *path = NULL;
801 struct btrfs_qgroup_status_item *ptr;
802 struct extent_buffer *leaf;
803 struct btrfs_key key;
7708f029
WS
804 struct btrfs_key found_key;
805 struct btrfs_qgroup *qgroup = NULL;
bed92eae 806 int ret = 0;
7708f029 807 int slot;
bed92eae 808
f2f6ed3d 809 mutex_lock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
810 if (fs_info->quota_root) {
811 fs_info->pending_quota_state = 1;
bed92eae
AJ
812 goto out;
813 }
bed92eae 814
1e8f9158
WS
815 fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
816 if (!fs_info->qgroup_ulist) {
817 ret = -ENOMEM;
818 goto out;
819 }
820
bed92eae
AJ
821 /*
822 * initially create the quota tree
823 */
824 quota_root = btrfs_create_tree(trans, fs_info,
825 BTRFS_QUOTA_TREE_OBJECTID);
826 if (IS_ERR(quota_root)) {
827 ret = PTR_ERR(quota_root);
828 goto out;
829 }
830
831 path = btrfs_alloc_path();
5b7ff5b3
TI
832 if (!path) {
833 ret = -ENOMEM;
834 goto out_free_root;
835 }
bed92eae
AJ
836
837 key.objectid = 0;
838 key.type = BTRFS_QGROUP_STATUS_KEY;
839 key.offset = 0;
840
841 ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
842 sizeof(*ptr));
843 if (ret)
5b7ff5b3 844 goto out_free_path;
bed92eae
AJ
845
846 leaf = path->nodes[0];
847 ptr = btrfs_item_ptr(leaf, path->slots[0],
848 struct btrfs_qgroup_status_item);
849 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
850 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
851 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
852 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
853 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
2f232036 854 btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
bed92eae
AJ
855
856 btrfs_mark_buffer_dirty(leaf);
857
7708f029
WS
858 key.objectid = 0;
859 key.type = BTRFS_ROOT_REF_KEY;
860 key.offset = 0;
861
862 btrfs_release_path(path);
863 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
864 if (ret > 0)
865 goto out_add_root;
866 if (ret < 0)
867 goto out_free_path;
868
869
870 while (1) {
871 slot = path->slots[0];
872 leaf = path->nodes[0];
873 btrfs_item_key_to_cpu(leaf, &found_key, slot);
874
875 if (found_key.type == BTRFS_ROOT_REF_KEY) {
876 ret = add_qgroup_item(trans, quota_root,
877 found_key.offset);
878 if (ret)
879 goto out_free_path;
880
7708f029
WS
881 qgroup = add_qgroup_rb(fs_info, found_key.offset);
882 if (IS_ERR(qgroup)) {
7708f029
WS
883 ret = PTR_ERR(qgroup);
884 goto out_free_path;
885 }
7708f029
WS
886 }
887 ret = btrfs_next_item(tree_root, path);
888 if (ret < 0)
889 goto out_free_path;
890 if (ret)
891 break;
892 }
893
894out_add_root:
895 btrfs_release_path(path);
896 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
897 if (ret)
898 goto out_free_path;
899
7708f029
WS
900 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
901 if (IS_ERR(qgroup)) {
7708f029
WS
902 ret = PTR_ERR(qgroup);
903 goto out_free_path;
904 }
58400fce 905 spin_lock(&fs_info->qgroup_lock);
bed92eae
AJ
906 fs_info->quota_root = quota_root;
907 fs_info->pending_quota_state = 1;
908 spin_unlock(&fs_info->qgroup_lock);
5b7ff5b3 909out_free_path:
bed92eae 910 btrfs_free_path(path);
5b7ff5b3
TI
911out_free_root:
912 if (ret) {
913 free_extent_buffer(quota_root->node);
914 free_extent_buffer(quota_root->commit_root);
915 kfree(quota_root);
916 }
917out:
eb1716af 918 if (ret) {
1e8f9158 919 ulist_free(fs_info->qgroup_ulist);
eb1716af
JS
920 fs_info->qgroup_ulist = NULL;
921 }
f2f6ed3d 922 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
923 return ret;
924}
925
926int btrfs_quota_disable(struct btrfs_trans_handle *trans,
927 struct btrfs_fs_info *fs_info)
928{
929 struct btrfs_root *tree_root = fs_info->tree_root;
930 struct btrfs_root *quota_root;
931 int ret = 0;
932
f2f6ed3d 933 mutex_lock(&fs_info->qgroup_ioctl_lock);
58400fce 934 if (!fs_info->quota_root)
f2f6ed3d 935 goto out;
58400fce 936 spin_lock(&fs_info->qgroup_lock);
bed92eae
AJ
937 fs_info->quota_enabled = 0;
938 fs_info->pending_quota_state = 0;
939 quota_root = fs_info->quota_root;
940 fs_info->quota_root = NULL;
bed92eae
AJ
941 spin_unlock(&fs_info->qgroup_lock);
942
e685da14
WS
943 btrfs_free_qgroup_config(fs_info);
944
bed92eae
AJ
945 ret = btrfs_clean_quota_tree(trans, quota_root);
946 if (ret)
947 goto out;
948
949 ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
950 if (ret)
951 goto out;
952
953 list_del(&quota_root->dirty_list);
954
955 btrfs_tree_lock(quota_root->node);
956 clean_tree_block(trans, tree_root, quota_root->node);
957 btrfs_tree_unlock(quota_root->node);
958 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
959
960 free_extent_buffer(quota_root->node);
961 free_extent_buffer(quota_root->commit_root);
962 kfree(quota_root);
963out:
f2f6ed3d 964 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
965 return ret;
966}
967
2f232036
JS
968static void qgroup_dirty(struct btrfs_fs_info *fs_info,
969 struct btrfs_qgroup *qgroup)
bed92eae 970{
2f232036
JS
971 if (list_empty(&qgroup->dirty))
972 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
bed92eae
AJ
973}
974
975int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
976 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
977{
978 struct btrfs_root *quota_root;
b7fef4f5
WS
979 struct btrfs_qgroup *parent;
980 struct btrfs_qgroup *member;
534e6623 981 struct btrfs_qgroup_list *list;
bed92eae
AJ
982 int ret = 0;
983
f2f6ed3d 984 mutex_lock(&fs_info->qgroup_ioctl_lock);
bed92eae 985 quota_root = fs_info->quota_root;
f2f6ed3d
WS
986 if (!quota_root) {
987 ret = -EINVAL;
988 goto out;
989 }
b7fef4f5
WS
990 member = find_qgroup_rb(fs_info, src);
991 parent = find_qgroup_rb(fs_info, dst);
992 if (!member || !parent) {
993 ret = -EINVAL;
994 goto out;
995 }
bed92eae 996
534e6623
WS
997 /* check if such qgroup relation exist firstly */
998 list_for_each_entry(list, &member->groups, next_group) {
999 if (list->group == parent) {
1000 ret = -EEXIST;
1001 goto out;
1002 }
1003 }
1004
bed92eae
AJ
1005 ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1006 if (ret)
f2f6ed3d 1007 goto out;
bed92eae
AJ
1008
1009 ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1010 if (ret) {
1011 del_qgroup_relation_item(trans, quota_root, src, dst);
f2f6ed3d 1012 goto out;
bed92eae
AJ
1013 }
1014
1015 spin_lock(&fs_info->qgroup_lock);
1016 ret = add_relation_rb(quota_root->fs_info, src, dst);
1017 spin_unlock(&fs_info->qgroup_lock);
f2f6ed3d
WS
1018out:
1019 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
1020 return ret;
1021}
1022
1023int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1024 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1025{
1026 struct btrfs_root *quota_root;
534e6623
WS
1027 struct btrfs_qgroup *parent;
1028 struct btrfs_qgroup *member;
1029 struct btrfs_qgroup_list *list;
bed92eae
AJ
1030 int ret = 0;
1031 int err;
1032
f2f6ed3d 1033 mutex_lock(&fs_info->qgroup_ioctl_lock);
bed92eae 1034 quota_root = fs_info->quota_root;
f2f6ed3d
WS
1035 if (!quota_root) {
1036 ret = -EINVAL;
1037 goto out;
1038 }
bed92eae 1039
534e6623
WS
1040 member = find_qgroup_rb(fs_info, src);
1041 parent = find_qgroup_rb(fs_info, dst);
1042 if (!member || !parent) {
1043 ret = -EINVAL;
1044 goto out;
1045 }
1046
1047 /* check if such qgroup relation exist firstly */
1048 list_for_each_entry(list, &member->groups, next_group) {
1049 if (list->group == parent)
1050 goto exist;
1051 }
1052 ret = -ENOENT;
1053 goto out;
1054exist:
bed92eae
AJ
1055 ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1056 err = del_qgroup_relation_item(trans, quota_root, dst, src);
1057 if (err && !ret)
1058 ret = err;
1059
1060 spin_lock(&fs_info->qgroup_lock);
1061 del_relation_rb(fs_info, src, dst);
bed92eae 1062 spin_unlock(&fs_info->qgroup_lock);
f2f6ed3d
WS
1063out:
1064 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
1065 return ret;
1066}
1067
1068int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1069 struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
1070{
1071 struct btrfs_root *quota_root;
1072 struct btrfs_qgroup *qgroup;
1073 int ret = 0;
1074
f2f6ed3d 1075 mutex_lock(&fs_info->qgroup_ioctl_lock);
bed92eae 1076 quota_root = fs_info->quota_root;
f2f6ed3d
WS
1077 if (!quota_root) {
1078 ret = -EINVAL;
1079 goto out;
1080 }
534e6623
WS
1081 qgroup = find_qgroup_rb(fs_info, qgroupid);
1082 if (qgroup) {
1083 ret = -EEXIST;
1084 goto out;
1085 }
bed92eae
AJ
1086
1087 ret = add_qgroup_item(trans, quota_root, qgroupid);
534e6623
WS
1088 if (ret)
1089 goto out;
bed92eae
AJ
1090
1091 spin_lock(&fs_info->qgroup_lock);
1092 qgroup = add_qgroup_rb(fs_info, qgroupid);
1093 spin_unlock(&fs_info->qgroup_lock);
1094
1095 if (IS_ERR(qgroup))
1096 ret = PTR_ERR(qgroup);
f2f6ed3d
WS
1097out:
1098 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
1099 return ret;
1100}
1101
1102int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1103 struct btrfs_fs_info *fs_info, u64 qgroupid)
1104{
1105 struct btrfs_root *quota_root;
2cf68703 1106 struct btrfs_qgroup *qgroup;
bed92eae
AJ
1107 int ret = 0;
1108
f2f6ed3d 1109 mutex_lock(&fs_info->qgroup_ioctl_lock);
bed92eae 1110 quota_root = fs_info->quota_root;
f2f6ed3d
WS
1111 if (!quota_root) {
1112 ret = -EINVAL;
1113 goto out;
1114 }
bed92eae 1115
2cf68703 1116 qgroup = find_qgroup_rb(fs_info, qgroupid);
534e6623
WS
1117 if (!qgroup) {
1118 ret = -ENOENT;
1119 goto out;
1120 } else {
1121 /* check if there are no relations to this qgroup */
1122 if (!list_empty(&qgroup->groups) ||
1123 !list_empty(&qgroup->members)) {
f2f6ed3d
WS
1124 ret = -EBUSY;
1125 goto out;
2cf68703
AJ
1126 }
1127 }
bed92eae
AJ
1128 ret = del_qgroup_item(trans, quota_root, qgroupid);
1129
1130 spin_lock(&fs_info->qgroup_lock);
1131 del_qgroup_rb(quota_root->fs_info, qgroupid);
bed92eae 1132 spin_unlock(&fs_info->qgroup_lock);
f2f6ed3d
WS
1133out:
1134 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
1135 return ret;
1136}
1137
1138int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1139 struct btrfs_fs_info *fs_info, u64 qgroupid,
1140 struct btrfs_qgroup_limit *limit)
1141{
f2f6ed3d 1142 struct btrfs_root *quota_root;
bed92eae
AJ
1143 struct btrfs_qgroup *qgroup;
1144 int ret = 0;
1145
f2f6ed3d
WS
1146 mutex_lock(&fs_info->qgroup_ioctl_lock);
1147 quota_root = fs_info->quota_root;
1148 if (!quota_root) {
1149 ret = -EINVAL;
1150 goto out;
1151 }
bed92eae 1152
ddb47afa
WS
1153 qgroup = find_qgroup_rb(fs_info, qgroupid);
1154 if (!qgroup) {
1155 ret = -ENOENT;
1156 goto out;
1157 }
bed92eae
AJ
1158 ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
1159 limit->flags, limit->max_rfer,
1160 limit->max_excl, limit->rsv_rfer,
1161 limit->rsv_excl);
1162 if (ret) {
1163 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1164 printk(KERN_INFO "unable to update quota limit for %llu\n",
c1c9ff7c 1165 qgroupid);
bed92eae
AJ
1166 }
1167
58400fce 1168 spin_lock(&fs_info->qgroup_lock);
bed92eae
AJ
1169 qgroup->lim_flags = limit->flags;
1170 qgroup->max_rfer = limit->max_rfer;
1171 qgroup->max_excl = limit->max_excl;
1172 qgroup->rsv_rfer = limit->rsv_rfer;
1173 qgroup->rsv_excl = limit->rsv_excl;
bed92eae 1174 spin_unlock(&fs_info->qgroup_lock);
f2f6ed3d
WS
1175out:
1176 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
1177 return ret;
1178}
1179
bed92eae
AJ
1180/*
1181 * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
1182 * the modification into a list that's later used by btrfs_end_transaction to
1183 * pass the recorded modifications on to btrfs_qgroup_account_ref.
1184 */
1185int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1186 struct btrfs_delayed_ref_node *node,
1187 struct btrfs_delayed_extent_op *extent_op)
1188{
1189 struct qgroup_update *u;
1190
1191 BUG_ON(!trans->delayed_ref_elem.seq);
1192 u = kmalloc(sizeof(*u), GFP_NOFS);
1193 if (!u)
1194 return -ENOMEM;
1195
1196 u->node = node;
1197 u->extent_op = extent_op;
1198 list_add_tail(&u->list, &trans->qgroup_ref_list);
1199
1200 return 0;
1201}
1202
46b665ce
JS
1203static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
1204 struct ulist *roots, struct ulist *tmp,
1205 u64 seq)
1206{
1207 struct ulist_node *unode;
1208 struct ulist_iterator uiter;
1209 struct ulist_node *tmp_unode;
1210 struct ulist_iterator tmp_uiter;
1211 struct btrfs_qgroup *qg;
1212 int ret;
1213
1214 ULIST_ITER_INIT(&uiter);
1215 while ((unode = ulist_next(roots, &uiter))) {
1216 qg = find_qgroup_rb(fs_info, unode->val);
1217 if (!qg)
1218 continue;
1219
1220 ulist_reinit(tmp);
1221 /* XXX id not needed */
1222 ret = ulist_add(tmp, qg->qgroupid,
1223 (u64)(uintptr_t)qg, GFP_ATOMIC);
1224 if (ret < 0)
1225 return ret;
1226 ULIST_ITER_INIT(&tmp_uiter);
1227 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1228 struct btrfs_qgroup_list *glist;
1229
1230 qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1231 if (qg->refcnt < seq)
1232 qg->refcnt = seq + 1;
1233 else
1234 ++qg->refcnt;
1235
1236 list_for_each_entry(glist, &qg->groups, next_group) {
1237 ret = ulist_add(tmp, glist->group->qgroupid,
1238 (u64)(uintptr_t)glist->group,
1239 GFP_ATOMIC);
1240 if (ret < 0)
1241 return ret;
1242 }
1243 }
1244 }
1245
1246 return 0;
1247}
1248
1249static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
1250 struct ulist *roots, struct ulist *tmp,
1251 u64 seq, int sgn, u64 num_bytes,
1252 struct btrfs_qgroup *qgroup)
1253{
1254 struct ulist_node *unode;
1255 struct ulist_iterator uiter;
1256 struct btrfs_qgroup *qg;
1257 struct btrfs_qgroup_list *glist;
1258 int ret;
1259
1260 ulist_reinit(tmp);
1261 ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
1262 if (ret < 0)
1263 return ret;
1264
1265 ULIST_ITER_INIT(&uiter);
1266 while ((unode = ulist_next(tmp, &uiter))) {
1267 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1268 if (qg->refcnt < seq) {
1269 /* not visited by step 1 */
1270 qg->rfer += sgn * num_bytes;
1271 qg->rfer_cmpr += sgn * num_bytes;
1272 if (roots->nnodes == 0) {
1273 qg->excl += sgn * num_bytes;
1274 qg->excl_cmpr += sgn * num_bytes;
1275 }
1276 qgroup_dirty(fs_info, qg);
1277 }
1278 WARN_ON(qg->tag >= seq);
1279 qg->tag = seq;
1280
1281 list_for_each_entry(glist, &qg->groups, next_group) {
1282 ret = ulist_add(tmp, glist->group->qgroupid,
1283 (uintptr_t)glist->group, GFP_ATOMIC);
1284 if (ret < 0)
1285 return ret;
1286 }
1287 }
1288
1289 return 0;
1290}
1291
1292static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
1293 struct ulist *roots, struct ulist *tmp,
1294 u64 seq, int sgn, u64 num_bytes)
1295{
1296 struct ulist_node *unode;
1297 struct ulist_iterator uiter;
1298 struct btrfs_qgroup *qg;
1299 struct ulist_node *tmp_unode;
1300 struct ulist_iterator tmp_uiter;
1301 int ret;
1302
1303 ULIST_ITER_INIT(&uiter);
1304 while ((unode = ulist_next(roots, &uiter))) {
1305 qg = find_qgroup_rb(fs_info, unode->val);
1306 if (!qg)
1307 continue;
1308
1309 ulist_reinit(tmp);
1310 ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
1311 if (ret < 0)
1312 return ret;
1313
1314 ULIST_ITER_INIT(&tmp_uiter);
1315 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1316 struct btrfs_qgroup_list *glist;
1317
1318 qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1319 if (qg->tag == seq)
1320 continue;
1321
1322 if (qg->refcnt - seq == roots->nnodes) {
1323 qg->excl -= sgn * num_bytes;
1324 qg->excl_cmpr -= sgn * num_bytes;
1325 qgroup_dirty(fs_info, qg);
1326 }
1327
1328 list_for_each_entry(glist, &qg->groups, next_group) {
1329 ret = ulist_add(tmp, glist->group->qgroupid,
1330 (uintptr_t)glist->group,
1331 GFP_ATOMIC);
1332 if (ret < 0)
1333 return ret;
1334 }
1335 }
1336 }
1337
1338 return 0;
1339}
1340
bed92eae
AJ
1341/*
1342 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1343 * from the fs. First, all roots referencing the extent are searched, and
1344 * then the space is accounted accordingly to the different roots. The
1345 * accounting algorithm works in 3 steps documented inline.
1346 */
1347int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1348 struct btrfs_fs_info *fs_info,
1349 struct btrfs_delayed_ref_node *node,
1350 struct btrfs_delayed_extent_op *extent_op)
1351{
1352 struct btrfs_key ins;
1353 struct btrfs_root *quota_root;
1354 u64 ref_root;
1355 struct btrfs_qgroup *qgroup;
bed92eae 1356 struct ulist *roots = NULL;
bed92eae
AJ
1357 u64 seq;
1358 int ret = 0;
1359 int sgn;
1360
1361 if (!fs_info->quota_enabled)
1362 return 0;
1363
1364 BUG_ON(!fs_info->quota_root);
1365
1366 ins.objectid = node->bytenr;
1367 ins.offset = node->num_bytes;
1368 ins.type = BTRFS_EXTENT_ITEM_KEY;
1369
1370 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1371 node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
1372 struct btrfs_delayed_tree_ref *ref;
1373 ref = btrfs_delayed_node_to_tree_ref(node);
1374 ref_root = ref->root;
1375 } else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1376 node->type == BTRFS_SHARED_DATA_REF_KEY) {
1377 struct btrfs_delayed_data_ref *ref;
1378 ref = btrfs_delayed_node_to_data_ref(node);
1379 ref_root = ref->root;
1380 } else {
1381 BUG();
1382 }
1383
1384 if (!is_fstree(ref_root)) {
1385 /*
1386 * non-fs-trees are not being accounted
1387 */
1388 return 0;
1389 }
1390
1391 switch (node->action) {
1392 case BTRFS_ADD_DELAYED_REF:
1393 case BTRFS_ADD_DELAYED_EXTENT:
1394 sgn = 1;
fc36ed7e 1395 seq = btrfs_tree_mod_seq_prev(node->seq);
bed92eae
AJ
1396 break;
1397 case BTRFS_DROP_DELAYED_REF:
1398 sgn = -1;
fc36ed7e 1399 seq = node->seq;
bed92eae
AJ
1400 break;
1401 case BTRFS_UPDATE_DELAYED_HEAD:
1402 return 0;
1403 default:
1404 BUG();
1405 }
1406
2f232036
JS
1407 mutex_lock(&fs_info->qgroup_rescan_lock);
1408 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1409 if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
1410 mutex_unlock(&fs_info->qgroup_rescan_lock);
1411 return 0;
1412 }
1413 }
1414 mutex_unlock(&fs_info->qgroup_rescan_lock);
1415
bed92eae
AJ
1416 /*
1417 * the delayed ref sequence number we pass depends on the direction of
fc36ed7e
JS
1418 * the operation. for add operations, we pass
1419 * tree_mod_log_prev_seq(node->seq) to skip
bed92eae
AJ
1420 * the delayed ref's current sequence number, because we need the state
1421 * of the tree before the add operation. for delete operations, we pass
1422 * (node->seq) to include the delayed ref's current sequence number,
1423 * because we need the state of the tree after the delete operation.
1424 */
fc36ed7e 1425 ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
bed92eae 1426 if (ret < 0)
a7975026 1427 return ret;
bed92eae
AJ
1428
1429 spin_lock(&fs_info->qgroup_lock);
2f232036 1430
bed92eae
AJ
1431 quota_root = fs_info->quota_root;
1432 if (!quota_root)
1433 goto unlock;
1434
1435 qgroup = find_qgroup_rb(fs_info, ref_root);
1436 if (!qgroup)
1437 goto unlock;
1438
1439 /*
1440 * step 1: for each old ref, visit all nodes once and inc refcnt
1441 */
1e8f9158 1442 ulist_reinit(fs_info->qgroup_ulist);
bed92eae
AJ
1443 seq = fs_info->qgroup_seq;
1444 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1445
1e8f9158
WS
1446 ret = qgroup_account_ref_step1(fs_info, roots, fs_info->qgroup_ulist,
1447 seq);
46b665ce
JS
1448 if (ret)
1449 goto unlock;
bed92eae
AJ
1450
1451 /*
1452 * step 2: walk from the new root
1453 */
1e8f9158
WS
1454 ret = qgroup_account_ref_step2(fs_info, roots, fs_info->qgroup_ulist,
1455 seq, sgn, node->num_bytes, qgroup);
46b665ce 1456 if (ret)
3c97185c 1457 goto unlock;
bed92eae
AJ
1458
1459 /*
1460 * step 3: walk again from old refs
1461 */
1e8f9158
WS
1462 ret = qgroup_account_ref_step3(fs_info, roots, fs_info->qgroup_ulist,
1463 seq, sgn, node->num_bytes);
46b665ce
JS
1464 if (ret)
1465 goto unlock;
bed92eae 1466
bed92eae
AJ
1467unlock:
1468 spin_unlock(&fs_info->qgroup_lock);
bed92eae 1469 ulist_free(roots);
bed92eae
AJ
1470
1471 return ret;
1472}
1473
1474/*
1475 * called from commit_transaction. Writes all changed qgroups to disk.
1476 */
1477int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
1478 struct btrfs_fs_info *fs_info)
1479{
1480 struct btrfs_root *quota_root = fs_info->quota_root;
1481 int ret = 0;
3d7b5a28 1482 int start_rescan_worker = 0;
bed92eae
AJ
1483
1484 if (!quota_root)
1485 goto out;
1486
3d7b5a28
JS
1487 if (!fs_info->quota_enabled && fs_info->pending_quota_state)
1488 start_rescan_worker = 1;
1489
bed92eae
AJ
1490 fs_info->quota_enabled = fs_info->pending_quota_state;
1491
1492 spin_lock(&fs_info->qgroup_lock);
1493 while (!list_empty(&fs_info->dirty_qgroups)) {
1494 struct btrfs_qgroup *qgroup;
1495 qgroup = list_first_entry(&fs_info->dirty_qgroups,
1496 struct btrfs_qgroup, dirty);
1497 list_del_init(&qgroup->dirty);
1498 spin_unlock(&fs_info->qgroup_lock);
1499 ret = update_qgroup_info_item(trans, quota_root, qgroup);
1500 if (ret)
1501 fs_info->qgroup_flags |=
1502 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1503 spin_lock(&fs_info->qgroup_lock);
1504 }
1505 if (fs_info->quota_enabled)
1506 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
1507 else
1508 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
1509 spin_unlock(&fs_info->qgroup_lock);
1510
1511 ret = update_qgroup_status_item(trans, fs_info, quota_root);
1512 if (ret)
1513 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1514
3d7b5a28 1515 if (!ret && start_rescan_worker) {
b382a324
JS
1516 ret = qgroup_rescan_init(fs_info, 0, 1);
1517 if (!ret) {
1518 qgroup_rescan_zero_tracking(fs_info);
1519 btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
1520 &fs_info->qgroup_rescan_work);
1521 }
3d7b5a28
JS
1522 ret = 0;
1523 }
1524
bed92eae
AJ
1525out:
1526
1527 return ret;
1528}
1529
1530/*
1531 * copy the acounting information between qgroups. This is necessary when a
1532 * snapshot or a subvolume is created
1533 */
1534int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1535 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
1536 struct btrfs_qgroup_inherit *inherit)
1537{
1538 int ret = 0;
1539 int i;
1540 u64 *i_qgroups;
1541 struct btrfs_root *quota_root = fs_info->quota_root;
1542 struct btrfs_qgroup *srcgroup;
1543 struct btrfs_qgroup *dstgroup;
1544 u32 level_size = 0;
3f5e2d3b 1545 u64 nums;
bed92eae 1546
f2f6ed3d 1547 mutex_lock(&fs_info->qgroup_ioctl_lock);
bed92eae 1548 if (!fs_info->quota_enabled)
f2f6ed3d 1549 goto out;
bed92eae 1550
f2f6ed3d
WS
1551 if (!quota_root) {
1552 ret = -EINVAL;
1553 goto out;
1554 }
bed92eae 1555
3f5e2d3b
WS
1556 if (inherit) {
1557 i_qgroups = (u64 *)(inherit + 1);
1558 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
1559 2 * inherit->num_excl_copies;
1560 for (i = 0; i < nums; ++i) {
1561 srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
1562 if (!srcgroup) {
1563 ret = -EINVAL;
1564 goto out;
1565 }
1566 ++i_qgroups;
1567 }
1568 }
1569
bed92eae
AJ
1570 /*
1571 * create a tracking group for the subvol itself
1572 */
1573 ret = add_qgroup_item(trans, quota_root, objectid);
1574 if (ret)
1575 goto out;
1576
1577 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
1578 ret = update_qgroup_limit_item(trans, quota_root, objectid,
1579 inherit->lim.flags,
1580 inherit->lim.max_rfer,
1581 inherit->lim.max_excl,
1582 inherit->lim.rsv_rfer,
1583 inherit->lim.rsv_excl);
1584 if (ret)
1585 goto out;
1586 }
1587
1588 if (srcid) {
1589 struct btrfs_root *srcroot;
1590 struct btrfs_key srckey;
1591 int srcroot_level;
1592
1593 srckey.objectid = srcid;
1594 srckey.type = BTRFS_ROOT_ITEM_KEY;
1595 srckey.offset = (u64)-1;
1596 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
1597 if (IS_ERR(srcroot)) {
1598 ret = PTR_ERR(srcroot);
1599 goto out;
1600 }
1601
1602 rcu_read_lock();
1603 srcroot_level = btrfs_header_level(srcroot->node);
1604 level_size = btrfs_level_size(srcroot, srcroot_level);
1605 rcu_read_unlock();
1606 }
1607
1608 /*
1609 * add qgroup to all inherited groups
1610 */
1611 if (inherit) {
1612 i_qgroups = (u64 *)(inherit + 1);
1613 for (i = 0; i < inherit->num_qgroups; ++i) {
1614 ret = add_qgroup_relation_item(trans, quota_root,
1615 objectid, *i_qgroups);
1616 if (ret)
1617 goto out;
1618 ret = add_qgroup_relation_item(trans, quota_root,
1619 *i_qgroups, objectid);
1620 if (ret)
1621 goto out;
1622 ++i_qgroups;
1623 }
1624 }
1625
1626
1627 spin_lock(&fs_info->qgroup_lock);
1628
1629 dstgroup = add_qgroup_rb(fs_info, objectid);
57a5a882
DC
1630 if (IS_ERR(dstgroup)) {
1631 ret = PTR_ERR(dstgroup);
bed92eae 1632 goto unlock;
57a5a882 1633 }
bed92eae
AJ
1634
1635 if (srcid) {
1636 srcgroup = find_qgroup_rb(fs_info, srcid);
f3a87f1b 1637 if (!srcgroup)
bed92eae
AJ
1638 goto unlock;
1639 dstgroup->rfer = srcgroup->rfer - level_size;
1640 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
1641 srcgroup->excl = level_size;
1642 srcgroup->excl_cmpr = level_size;
1643 qgroup_dirty(fs_info, dstgroup);
1644 qgroup_dirty(fs_info, srcgroup);
1645 }
1646
f3a87f1b 1647 if (!inherit)
bed92eae
AJ
1648 goto unlock;
1649
1650 i_qgroups = (u64 *)(inherit + 1);
1651 for (i = 0; i < inherit->num_qgroups; ++i) {
1652 ret = add_relation_rb(quota_root->fs_info, objectid,
1653 *i_qgroups);
1654 if (ret)
1655 goto unlock;
1656 ++i_qgroups;
1657 }
1658
1659 for (i = 0; i < inherit->num_ref_copies; ++i) {
1660 struct btrfs_qgroup *src;
1661 struct btrfs_qgroup *dst;
1662
1663 src = find_qgroup_rb(fs_info, i_qgroups[0]);
1664 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1665
1666 if (!src || !dst) {
1667 ret = -EINVAL;
1668 goto unlock;
1669 }
1670
1671 dst->rfer = src->rfer - level_size;
1672 dst->rfer_cmpr = src->rfer_cmpr - level_size;
1673 i_qgroups += 2;
1674 }
1675 for (i = 0; i < inherit->num_excl_copies; ++i) {
1676 struct btrfs_qgroup *src;
1677 struct btrfs_qgroup *dst;
1678
1679 src = find_qgroup_rb(fs_info, i_qgroups[0]);
1680 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1681
1682 if (!src || !dst) {
1683 ret = -EINVAL;
1684 goto unlock;
1685 }
1686
1687 dst->excl = src->excl + level_size;
1688 dst->excl_cmpr = src->excl_cmpr + level_size;
1689 i_qgroups += 2;
1690 }
1691
1692unlock:
1693 spin_unlock(&fs_info->qgroup_lock);
1694out:
f2f6ed3d 1695 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
1696 return ret;
1697}
1698
1699/*
1700 * reserve some space for a qgroup and all its parents. The reservation takes
1701 * place with start_transaction or dealloc_reserve, similar to ENOSPC
1702 * accounting. If not enough space is available, EDQUOT is returned.
1703 * We assume that the requested space is new for all qgroups.
1704 */
1705int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1706{
1707 struct btrfs_root *quota_root;
1708 struct btrfs_qgroup *qgroup;
1709 struct btrfs_fs_info *fs_info = root->fs_info;
1710 u64 ref_root = root->root_key.objectid;
1711 int ret = 0;
bed92eae
AJ
1712 struct ulist_node *unode;
1713 struct ulist_iterator uiter;
1714
1715 if (!is_fstree(ref_root))
1716 return 0;
1717
1718 if (num_bytes == 0)
1719 return 0;
1720
1721 spin_lock(&fs_info->qgroup_lock);
1722 quota_root = fs_info->quota_root;
1723 if (!quota_root)
1724 goto out;
1725
1726 qgroup = find_qgroup_rb(fs_info, ref_root);
1727 if (!qgroup)
1728 goto out;
1729
1730 /*
1731 * in a first step, we check all affected qgroups if any limits would
1732 * be exceeded
1733 */
1e8f9158
WS
1734 ulist_reinit(fs_info->qgroup_ulist);
1735 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
3c97185c
WS
1736 (uintptr_t)qgroup, GFP_ATOMIC);
1737 if (ret < 0)
1738 goto out;
bed92eae 1739 ULIST_ITER_INIT(&uiter);
1e8f9158 1740 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
bed92eae
AJ
1741 struct btrfs_qgroup *qg;
1742 struct btrfs_qgroup_list *glist;
1743
995e01b7 1744 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
bed92eae
AJ
1745
1746 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
b4fcd6be 1747 qg->reserved + (s64)qg->rfer + num_bytes >
720f1e20 1748 qg->max_rfer) {
bed92eae 1749 ret = -EDQUOT;
720f1e20
WS
1750 goto out;
1751 }
bed92eae
AJ
1752
1753 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
b4fcd6be 1754 qg->reserved + (s64)qg->excl + num_bytes >
720f1e20 1755 qg->max_excl) {
bed92eae 1756 ret = -EDQUOT;
720f1e20
WS
1757 goto out;
1758 }
bed92eae
AJ
1759
1760 list_for_each_entry(glist, &qg->groups, next_group) {
1e8f9158
WS
1761 ret = ulist_add(fs_info->qgroup_ulist,
1762 glist->group->qgroupid,
3c97185c
WS
1763 (uintptr_t)glist->group, GFP_ATOMIC);
1764 if (ret < 0)
1765 goto out;
bed92eae
AJ
1766 }
1767 }
3c97185c 1768 ret = 0;
bed92eae
AJ
1769 /*
1770 * no limits exceeded, now record the reservation into all qgroups
1771 */
1772 ULIST_ITER_INIT(&uiter);
1e8f9158 1773 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
bed92eae
AJ
1774 struct btrfs_qgroup *qg;
1775
995e01b7 1776 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
bed92eae
AJ
1777
1778 qg->reserved += num_bytes;
1779 }
1780
1781out:
1782 spin_unlock(&fs_info->qgroup_lock);
bed92eae
AJ
1783 return ret;
1784}
1785
1786void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1787{
1788 struct btrfs_root *quota_root;
1789 struct btrfs_qgroup *qgroup;
1790 struct btrfs_fs_info *fs_info = root->fs_info;
bed92eae
AJ
1791 struct ulist_node *unode;
1792 struct ulist_iterator uiter;
1793 u64 ref_root = root->root_key.objectid;
3c97185c 1794 int ret = 0;
bed92eae
AJ
1795
1796 if (!is_fstree(ref_root))
1797 return;
1798
1799 if (num_bytes == 0)
1800 return;
1801
1802 spin_lock(&fs_info->qgroup_lock);
1803
1804 quota_root = fs_info->quota_root;
1805 if (!quota_root)
1806 goto out;
1807
1808 qgroup = find_qgroup_rb(fs_info, ref_root);
1809 if (!qgroup)
1810 goto out;
1811
1e8f9158
WS
1812 ulist_reinit(fs_info->qgroup_ulist);
1813 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
3c97185c
WS
1814 (uintptr_t)qgroup, GFP_ATOMIC);
1815 if (ret < 0)
1816 goto out;
bed92eae 1817 ULIST_ITER_INIT(&uiter);
1e8f9158 1818 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
bed92eae
AJ
1819 struct btrfs_qgroup *qg;
1820 struct btrfs_qgroup_list *glist;
1821
995e01b7 1822 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
bed92eae
AJ
1823
1824 qg->reserved -= num_bytes;
1825
1826 list_for_each_entry(glist, &qg->groups, next_group) {
1e8f9158
WS
1827 ret = ulist_add(fs_info->qgroup_ulist,
1828 glist->group->qgroupid,
3c97185c
WS
1829 (uintptr_t)glist->group, GFP_ATOMIC);
1830 if (ret < 0)
1831 goto out;
bed92eae
AJ
1832 }
1833 }
1834
1835out:
1836 spin_unlock(&fs_info->qgroup_lock);
bed92eae
AJ
1837}
1838
1839void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1840{
1841 if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
1842 return;
fc36ed7e 1843 pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n",
bed92eae 1844 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
fc36ed7e
JS
1845 (u32)(trans->delayed_ref_elem.seq >> 32),
1846 (u32)trans->delayed_ref_elem.seq);
bed92eae
AJ
1847 BUG();
1848}
2f232036
JS
1849
1850/*
1851 * returns < 0 on error, 0 when more leafs are to be scanned.
1852 * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
1853 */
1854static int
b382a324 1855qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
2f232036
JS
1856 struct btrfs_trans_handle *trans, struct ulist *tmp,
1857 struct extent_buffer *scratch_leaf)
1858{
1859 struct btrfs_key found;
2f232036
JS
1860 struct ulist *roots = NULL;
1861 struct ulist_node *unode;
1862 struct ulist_iterator uiter;
1863 struct seq_list tree_mod_seq_elem = {};
1864 u64 seq;
1865 int slot;
1866 int ret;
1867
1868 path->leave_spinning = 1;
1869 mutex_lock(&fs_info->qgroup_rescan_lock);
1870 ret = btrfs_search_slot_for_read(fs_info->extent_root,
1871 &fs_info->qgroup_rescan_progress,
1872 path, 1, 0);
1873
1874 pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
c1c9ff7c 1875 fs_info->qgroup_rescan_progress.objectid,
2f232036 1876 fs_info->qgroup_rescan_progress.type,
c1c9ff7c 1877 fs_info->qgroup_rescan_progress.offset, ret);
2f232036
JS
1878
1879 if (ret) {
1880 /*
1881 * The rescan is about to end, we will not be scanning any
1882 * further blocks. We cannot unset the RESCAN flag here, because
1883 * we want to commit the transaction if everything went well.
1884 * To make the live accounting work in this phase, we set our
1885 * scan progress pointer such that every real extent objectid
1886 * will be smaller.
1887 */
1888 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
1889 btrfs_release_path(path);
1890 mutex_unlock(&fs_info->qgroup_rescan_lock);
1891 return ret;
1892 }
1893
1894 btrfs_item_key_to_cpu(path->nodes[0], &found,
1895 btrfs_header_nritems(path->nodes[0]) - 1);
1896 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
1897
1898 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1899 memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
1900 slot = path->slots[0];
1901 btrfs_release_path(path);
1902 mutex_unlock(&fs_info->qgroup_rescan_lock);
1903
1904 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
1905 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
1906 if (found.type != BTRFS_EXTENT_ITEM_KEY)
1907 continue;
1908 ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
1909 tree_mod_seq_elem.seq, &roots);
1910 if (ret < 0)
1911 goto out;
1912 spin_lock(&fs_info->qgroup_lock);
1913 seq = fs_info->qgroup_seq;
1914 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1915
1916 ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
1917 if (ret) {
1918 spin_unlock(&fs_info->qgroup_lock);
1919 ulist_free(roots);
1920 goto out;
1921 }
1922
1923 /*
1924 * step2 of btrfs_qgroup_account_ref works from a single root,
1925 * we're doing all at once here.
1926 */
1927 ulist_reinit(tmp);
1928 ULIST_ITER_INIT(&uiter);
1929 while ((unode = ulist_next(roots, &uiter))) {
1930 struct btrfs_qgroup *qg;
1931
1932 qg = find_qgroup_rb(fs_info, unode->val);
1933 if (!qg)
1934 continue;
1935
1936 ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
1937 GFP_ATOMIC);
1938 if (ret < 0) {
1939 spin_unlock(&fs_info->qgroup_lock);
1940 ulist_free(roots);
1941 goto out;
1942 }
1943 }
1944
1945 /* this loop is similar to step 2 of btrfs_qgroup_account_ref */
1946 ULIST_ITER_INIT(&uiter);
1947 while ((unode = ulist_next(tmp, &uiter))) {
1948 struct btrfs_qgroup *qg;
1949 struct btrfs_qgroup_list *glist;
1950
1951 qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
1952 qg->rfer += found.offset;
1953 qg->rfer_cmpr += found.offset;
1954 WARN_ON(qg->tag >= seq);
1955 if (qg->refcnt - seq == roots->nnodes) {
1956 qg->excl += found.offset;
1957 qg->excl_cmpr += found.offset;
1958 }
1959 qgroup_dirty(fs_info, qg);
1960
1961 list_for_each_entry(glist, &qg->groups, next_group) {
1962 ret = ulist_add(tmp, glist->group->qgroupid,
1963 (uintptr_t)glist->group,
1964 GFP_ATOMIC);
1965 if (ret < 0) {
1966 spin_unlock(&fs_info->qgroup_lock);
1967 ulist_free(roots);
1968 goto out;
1969 }
1970 }
1971 }
1972
1973 spin_unlock(&fs_info->qgroup_lock);
1974 ulist_free(roots);
1975 ret = 0;
1976 }
1977
1978out:
1979 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1980
1981 return ret;
1982}
1983
1984static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
1985{
b382a324
JS
1986 struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
1987 qgroup_rescan_work);
2f232036
JS
1988 struct btrfs_path *path;
1989 struct btrfs_trans_handle *trans = NULL;
2f232036
JS
1990 struct ulist *tmp = NULL;
1991 struct extent_buffer *scratch_leaf = NULL;
1992 int err = -ENOMEM;
1993
1994 path = btrfs_alloc_path();
1995 if (!path)
1996 goto out;
1997 tmp = ulist_alloc(GFP_NOFS);
1998 if (!tmp)
1999 goto out;
2000 scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2001 if (!scratch_leaf)
2002 goto out;
2003
2004 err = 0;
2005 while (!err) {
2006 trans = btrfs_start_transaction(fs_info->fs_root, 0);
2007 if (IS_ERR(trans)) {
2008 err = PTR_ERR(trans);
2009 break;
2010 }
2011 if (!fs_info->quota_enabled) {
2012 err = -EINTR;
2013 } else {
b382a324 2014 err = qgroup_rescan_leaf(fs_info, path, trans,
2f232036
JS
2015 tmp, scratch_leaf);
2016 }
2017 if (err > 0)
2018 btrfs_commit_transaction(trans, fs_info->fs_root);
2019 else
2020 btrfs_end_transaction(trans, fs_info->fs_root);
2021 }
2022
2023out:
2024 kfree(scratch_leaf);
2025 ulist_free(tmp);
2026 btrfs_free_path(path);
2f232036
JS
2027
2028 mutex_lock(&fs_info->qgroup_rescan_lock);
2029 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2030
2031 if (err == 2 &&
2032 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2033 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2034 } else if (err < 0) {
2035 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2036 }
2037 mutex_unlock(&fs_info->qgroup_rescan_lock);
2038
2039 if (err >= 0) {
2040 pr_info("btrfs: qgroup scan completed%s\n",
2041 err == 2 ? " (inconsistency flag cleared)" : "");
2042 } else {
2043 pr_err("btrfs: qgroup scan failed with %d\n", err);
2044 }
57254b6e
JS
2045
2046 complete_all(&fs_info->qgroup_rescan_completion);
2f232036
JS
2047}
2048
b382a324
JS
2049/*
2050 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2051 * memory required for the rescan context.
2052 */
2053static int
2054qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2055 int init_flags)
2f232036
JS
2056{
2057 int ret = 0;
2f232036 2058
b382a324
JS
2059 if (!init_flags &&
2060 (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
2061 !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
2062 ret = -EINVAL;
2063 goto err;
2064 }
2f232036
JS
2065
2066 mutex_lock(&fs_info->qgroup_rescan_lock);
2067 spin_lock(&fs_info->qgroup_lock);
b382a324
JS
2068
2069 if (init_flags) {
2070 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2071 ret = -EINPROGRESS;
2072 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2073 ret = -EINVAL;
2074
2075 if (ret) {
2076 spin_unlock(&fs_info->qgroup_lock);
2077 mutex_unlock(&fs_info->qgroup_rescan_lock);
2078 goto err;
2079 }
2080
2081 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2f232036
JS
2082 }
2083
2f232036
JS
2084 memset(&fs_info->qgroup_rescan_progress, 0,
2085 sizeof(fs_info->qgroup_rescan_progress));
b382a324
JS
2086 fs_info->qgroup_rescan_progress.objectid = progress_objectid;
2087
2088 spin_unlock(&fs_info->qgroup_lock);
2089 mutex_unlock(&fs_info->qgroup_rescan_lock);
2090
57254b6e 2091 init_completion(&fs_info->qgroup_rescan_completion);
2f232036 2092
b382a324
JS
2093 memset(&fs_info->qgroup_rescan_work, 0,
2094 sizeof(fs_info->qgroup_rescan_work));
2095 fs_info->qgroup_rescan_work.func = btrfs_qgroup_rescan_worker;
2096
2097 if (ret) {
2098err:
2099 pr_info("btrfs: qgroup_rescan_init failed with %d\n", ret);
2100 return ret;
2101 }
2102
2103 return 0;
2104}
2105
2106static void
2107qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
2108{
2109 struct rb_node *n;
2110 struct btrfs_qgroup *qgroup;
2111
2112 spin_lock(&fs_info->qgroup_lock);
2f232036
JS
2113 /* clear all current qgroup tracking information */
2114 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2115 qgroup = rb_entry(n, struct btrfs_qgroup, node);
2116 qgroup->rfer = 0;
2117 qgroup->rfer_cmpr = 0;
2118 qgroup->excl = 0;
2119 qgroup->excl_cmpr = 0;
2120 }
2121 spin_unlock(&fs_info->qgroup_lock);
b382a324 2122}
2f232036 2123
b382a324
JS
2124int
2125btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2126{
2127 int ret = 0;
2128 struct btrfs_trans_handle *trans;
2129
2130 ret = qgroup_rescan_init(fs_info, 0, 1);
2131 if (ret)
2132 return ret;
2133
2134 /*
2135 * We have set the rescan_progress to 0, which means no more
2136 * delayed refs will be accounted by btrfs_qgroup_account_ref.
2137 * However, btrfs_qgroup_account_ref may be right after its call
2138 * to btrfs_find_all_roots, in which case it would still do the
2139 * accounting.
2140 * To solve this, we're committing the transaction, which will
2141 * ensure we run all delayed refs and only after that, we are
2142 * going to clear all tracking information for a clean start.
2143 */
2144
2145 trans = btrfs_join_transaction(fs_info->fs_root);
2146 if (IS_ERR(trans)) {
2147 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2148 return PTR_ERR(trans);
2149 }
2150 ret = btrfs_commit_transaction(trans, fs_info->fs_root);
2151 if (ret) {
2152 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2153 return ret;
2154 }
2155
2156 qgroup_rescan_zero_tracking(fs_info);
2157
2158 btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
2159 &fs_info->qgroup_rescan_work);
2f232036
JS
2160
2161 return 0;
2162}
57254b6e
JS
2163
2164int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
2165{
2166 int running;
2167 int ret = 0;
2168
2169 mutex_lock(&fs_info->qgroup_rescan_lock);
2170 spin_lock(&fs_info->qgroup_lock);
2171 running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2172 spin_unlock(&fs_info->qgroup_lock);
2173 mutex_unlock(&fs_info->qgroup_rescan_lock);
2174
2175 if (running)
2176 ret = wait_for_completion_interruptible(
2177 &fs_info->qgroup_rescan_completion);
2178
2179 return ret;
2180}
b382a324
JS
2181
2182/*
2183 * this is only called from open_ctree where we're still single threaded, thus
2184 * locking is omitted here.
2185 */
2186void
2187btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
2188{
2189 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2190 btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
2191 &fs_info->qgroup_rescan_work);
2192}