btrfs: qgroup: Split meta rsv type into meta_prealloc and meta_pertrans
[linux-2.6-block.git] / fs / btrfs / qgroup.c
CommitLineData
bed92eae
AJ
1/*
2 * Copyright (C) 2011 STRATO. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h>
23#include <linux/rbtree.h>
24#include <linux/slab.h>
25#include <linux/workqueue.h>
55e301fd 26#include <linux/btrfs.h>
bed92eae
AJ
27
28#include "ctree.h"
29#include "transaction.h"
30#include "disk-io.h"
31#include "locking.h"
32#include "ulist.h"
bed92eae 33#include "backref.h"
2f232036 34#include "extent_io.h"
fcebe456 35#include "qgroup.h"
bed92eae 36
e69bcee3 37
bed92eae
AJ
38/* TODO XXX FIXME
39 * - subvol delete -> delete when ref goes to 0? delete limits also?
40 * - reorganize keys
41 * - compressed
42 * - sync
bed92eae
AJ
43 * - copy also limits on subvol creation
44 * - limit
45 * - caches fuer ulists
46 * - performance benchmarks
47 * - check all ioctl parameters
48 */
49
f59c0347
QW
50/*
51 * Helpers to access qgroup reservation
52 *
53 * Callers should ensure the lock context and type are valid
54 */
55
56static u64 qgroup_rsv_total(const struct btrfs_qgroup *qgroup)
57{
58 u64 ret = 0;
59 int i;
60
61 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
62 ret += qgroup->rsv.values[i];
63
64 return ret;
65}
66
67#ifdef CONFIG_BTRFS_DEBUG
68static const char *qgroup_rsv_type_str(enum btrfs_qgroup_rsv_type type)
69{
70 if (type == BTRFS_QGROUP_RSV_DATA)
71 return "data";
733e03a0
QW
72 if (type == BTRFS_QGROUP_RSV_META_PERTRANS)
73 return "meta_pertrans";
74 if (type == BTRFS_QGROUP_RSV_META_PREALLOC)
75 return "meta_prealloc";
f59c0347
QW
76 return NULL;
77}
78#endif
79
64ee4e75
QW
80static void qgroup_rsv_add(struct btrfs_fs_info *fs_info,
81 struct btrfs_qgroup *qgroup, u64 num_bytes,
f59c0347
QW
82 enum btrfs_qgroup_rsv_type type)
83{
64ee4e75 84 trace_qgroup_update_reserve(fs_info, qgroup, num_bytes, type);
f59c0347
QW
85 qgroup->rsv.values[type] += num_bytes;
86}
87
64ee4e75
QW
88static void qgroup_rsv_release(struct btrfs_fs_info *fs_info,
89 struct btrfs_qgroup *qgroup, u64 num_bytes,
f59c0347
QW
90 enum btrfs_qgroup_rsv_type type)
91{
64ee4e75 92 trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes, type);
f59c0347
QW
93 if (qgroup->rsv.values[type] >= num_bytes) {
94 qgroup->rsv.values[type] -= num_bytes;
95 return;
96 }
97#ifdef CONFIG_BTRFS_DEBUG
98 WARN_RATELIMIT(1,
99 "qgroup %llu %s reserved space underflow, have %llu to free %llu",
100 qgroup->qgroupid, qgroup_rsv_type_str(type),
101 qgroup->rsv.values[type], num_bytes);
102#endif
103 qgroup->rsv.values[type] = 0;
104}
105
64ee4e75
QW
106static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info *fs_info,
107 struct btrfs_qgroup *dest,
108 struct btrfs_qgroup *src)
f59c0347
QW
109{
110 int i;
111
112 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
64ee4e75 113 qgroup_rsv_add(fs_info, dest, src->rsv.values[i], i);
f59c0347
QW
114}
115
64ee4e75
QW
116static void qgroup_rsv_release_by_qgroup(struct btrfs_fs_info *fs_info,
117 struct btrfs_qgroup *dest,
f59c0347
QW
118 struct btrfs_qgroup *src)
119{
120 int i;
121
122 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
64ee4e75 123 qgroup_rsv_release(fs_info, dest, src->rsv.values[i], i);
f59c0347
QW
124}
125
9c542136
QW
126static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
127 int mod)
128{
129 if (qg->old_refcnt < seq)
130 qg->old_refcnt = seq;
131 qg->old_refcnt += mod;
132}
133
134static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq,
135 int mod)
136{
137 if (qg->new_refcnt < seq)
138 qg->new_refcnt = seq;
139 qg->new_refcnt += mod;
140}
141
142static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq)
143{
144 if (qg->old_refcnt < seq)
145 return 0;
146 return qg->old_refcnt - seq;
147}
148
149static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq)
150{
151 if (qg->new_refcnt < seq)
152 return 0;
153 return qg->new_refcnt - seq;
154}
155
bed92eae
AJ
156/*
157 * glue structure to represent the relations between qgroups.
158 */
159struct btrfs_qgroup_list {
160 struct list_head next_group;
161 struct list_head next_member;
162 struct btrfs_qgroup *group;
163 struct btrfs_qgroup *member;
164};
165
ef2fff64
DS
166static inline u64 qgroup_to_aux(struct btrfs_qgroup *qg)
167{
168 return (u64)(uintptr_t)qg;
169}
170
171static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n)
172{
173 return (struct btrfs_qgroup *)(uintptr_t)n->aux;
174}
fcebe456 175
b382a324
JS
176static int
177qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
178 int init_flags);
179static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
2f232036 180
58400fce 181/* must be called with qgroup_ioctl_lock held */
bed92eae
AJ
182static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
183 u64 qgroupid)
184{
185 struct rb_node *n = fs_info->qgroup_tree.rb_node;
186 struct btrfs_qgroup *qgroup;
187
188 while (n) {
189 qgroup = rb_entry(n, struct btrfs_qgroup, node);
190 if (qgroup->qgroupid < qgroupid)
191 n = n->rb_left;
192 else if (qgroup->qgroupid > qgroupid)
193 n = n->rb_right;
194 else
195 return qgroup;
196 }
197 return NULL;
198}
199
200/* must be called with qgroup_lock held */
201static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
202 u64 qgroupid)
203{
204 struct rb_node **p = &fs_info->qgroup_tree.rb_node;
205 struct rb_node *parent = NULL;
206 struct btrfs_qgroup *qgroup;
207
208 while (*p) {
209 parent = *p;
210 qgroup = rb_entry(parent, struct btrfs_qgroup, node);
211
212 if (qgroup->qgroupid < qgroupid)
213 p = &(*p)->rb_left;
214 else if (qgroup->qgroupid > qgroupid)
215 p = &(*p)->rb_right;
216 else
217 return qgroup;
218 }
219
220 qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
221 if (!qgroup)
222 return ERR_PTR(-ENOMEM);
223
224 qgroup->qgroupid = qgroupid;
225 INIT_LIST_HEAD(&qgroup->groups);
226 INIT_LIST_HEAD(&qgroup->members);
227 INIT_LIST_HEAD(&qgroup->dirty);
228
229 rb_link_node(&qgroup->node, parent, p);
230 rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
231
232 return qgroup;
233}
234
4082bd3d 235static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
bed92eae 236{
bed92eae
AJ
237 struct btrfs_qgroup_list *list;
238
bed92eae 239 list_del(&qgroup->dirty);
bed92eae
AJ
240 while (!list_empty(&qgroup->groups)) {
241 list = list_first_entry(&qgroup->groups,
242 struct btrfs_qgroup_list, next_group);
243 list_del(&list->next_group);
244 list_del(&list->next_member);
245 kfree(list);
246 }
247
248 while (!list_empty(&qgroup->members)) {
249 list = list_first_entry(&qgroup->members,
250 struct btrfs_qgroup_list, next_member);
251 list_del(&list->next_group);
252 list_del(&list->next_member);
253 kfree(list);
254 }
255 kfree(qgroup);
4082bd3d 256}
bed92eae 257
4082bd3d
WS
258/* must be called with qgroup_lock held */
259static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
260{
261 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
262
263 if (!qgroup)
264 return -ENOENT;
265
266 rb_erase(&qgroup->node, &fs_info->qgroup_tree);
267 __del_qgroup_rb(qgroup);
bed92eae
AJ
268 return 0;
269}
270
271/* must be called with qgroup_lock held */
272static int add_relation_rb(struct btrfs_fs_info *fs_info,
273 u64 memberid, u64 parentid)
274{
275 struct btrfs_qgroup *member;
276 struct btrfs_qgroup *parent;
277 struct btrfs_qgroup_list *list;
278
279 member = find_qgroup_rb(fs_info, memberid);
280 parent = find_qgroup_rb(fs_info, parentid);
281 if (!member || !parent)
282 return -ENOENT;
283
284 list = kzalloc(sizeof(*list), GFP_ATOMIC);
285 if (!list)
286 return -ENOMEM;
287
288 list->group = parent;
289 list->member = member;
290 list_add_tail(&list->next_group, &member->groups);
291 list_add_tail(&list->next_member, &parent->members);
292
293 return 0;
294}
295
296/* must be called with qgroup_lock held */
297static int del_relation_rb(struct btrfs_fs_info *fs_info,
298 u64 memberid, u64 parentid)
299{
300 struct btrfs_qgroup *member;
301 struct btrfs_qgroup *parent;
302 struct btrfs_qgroup_list *list;
303
304 member = find_qgroup_rb(fs_info, memberid);
305 parent = find_qgroup_rb(fs_info, parentid);
306 if (!member || !parent)
307 return -ENOENT;
308
309 list_for_each_entry(list, &member->groups, next_group) {
310 if (list->group == parent) {
311 list_del(&list->next_group);
312 list_del(&list->next_member);
313 kfree(list);
314 return 0;
315 }
316 }
317 return -ENOENT;
318}
319
faa2dbf0
JB
320#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
321int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
322 u64 rfer, u64 excl)
323{
324 struct btrfs_qgroup *qgroup;
325
326 qgroup = find_qgroup_rb(fs_info, qgroupid);
327 if (!qgroup)
328 return -EINVAL;
329 if (qgroup->rfer != rfer || qgroup->excl != excl)
330 return -EINVAL;
331 return 0;
332}
333#endif
334
bed92eae
AJ
335/*
336 * The full config is read in one go, only called from open_ctree()
337 * It doesn't use any locking, as at this point we're still single-threaded
338 */
339int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
340{
341 struct btrfs_key key;
342 struct btrfs_key found_key;
343 struct btrfs_root *quota_root = fs_info->quota_root;
344 struct btrfs_path *path = NULL;
345 struct extent_buffer *l;
346 int slot;
347 int ret = 0;
348 u64 flags = 0;
b382a324 349 u64 rescan_progress = 0;
bed92eae 350
afcdd129 351 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
bed92eae
AJ
352 return 0;
353
323b88f4 354 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
1e8f9158
WS
355 if (!fs_info->qgroup_ulist) {
356 ret = -ENOMEM;
357 goto out;
358 }
359
bed92eae
AJ
360 path = btrfs_alloc_path();
361 if (!path) {
362 ret = -ENOMEM;
363 goto out;
364 }
365
366 /* default this to quota off, in case no status key is found */
367 fs_info->qgroup_flags = 0;
368
369 /*
370 * pass 1: read status, all qgroup infos and limits
371 */
372 key.objectid = 0;
373 key.type = 0;
374 key.offset = 0;
375 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
376 if (ret)
377 goto out;
378
379 while (1) {
380 struct btrfs_qgroup *qgroup;
381
382 slot = path->slots[0];
383 l = path->nodes[0];
384 btrfs_item_key_to_cpu(l, &found_key, slot);
385
386 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
387 struct btrfs_qgroup_status_item *ptr;
388
389 ptr = btrfs_item_ptr(l, slot,
390 struct btrfs_qgroup_status_item);
391
392 if (btrfs_qgroup_status_version(l, ptr) !=
393 BTRFS_QGROUP_STATUS_VERSION) {
efe120a0
FH
394 btrfs_err(fs_info,
395 "old qgroup version, quota disabled");
bed92eae
AJ
396 goto out;
397 }
398 if (btrfs_qgroup_status_generation(l, ptr) !=
399 fs_info->generation) {
400 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
efe120a0 401 btrfs_err(fs_info,
5d163e0e 402 "qgroup generation mismatch, marked as inconsistent");
bed92eae
AJ
403 }
404 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
405 ptr);
b382a324 406 rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
bed92eae
AJ
407 goto next1;
408 }
409
410 if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
411 found_key.type != BTRFS_QGROUP_LIMIT_KEY)
412 goto next1;
413
414 qgroup = find_qgroup_rb(fs_info, found_key.offset);
415 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
416 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
d41e36a0 417 btrfs_err(fs_info, "inconsistent qgroup config");
bed92eae
AJ
418 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
419 }
420 if (!qgroup) {
421 qgroup = add_qgroup_rb(fs_info, found_key.offset);
422 if (IS_ERR(qgroup)) {
423 ret = PTR_ERR(qgroup);
424 goto out;
425 }
426 }
427 switch (found_key.type) {
428 case BTRFS_QGROUP_INFO_KEY: {
429 struct btrfs_qgroup_info_item *ptr;
430
431 ptr = btrfs_item_ptr(l, slot,
432 struct btrfs_qgroup_info_item);
433 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
434 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
435 qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
436 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
437 /* generation currently unused */
438 break;
439 }
440 case BTRFS_QGROUP_LIMIT_KEY: {
441 struct btrfs_qgroup_limit_item *ptr;
442
443 ptr = btrfs_item_ptr(l, slot,
444 struct btrfs_qgroup_limit_item);
445 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
446 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
447 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
448 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
449 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
450 break;
451 }
452 }
453next1:
454 ret = btrfs_next_item(quota_root, path);
455 if (ret < 0)
456 goto out;
457 if (ret)
458 break;
459 }
460 btrfs_release_path(path);
461
462 /*
463 * pass 2: read all qgroup relations
464 */
465 key.objectid = 0;
466 key.type = BTRFS_QGROUP_RELATION_KEY;
467 key.offset = 0;
468 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
469 if (ret)
470 goto out;
471 while (1) {
472 slot = path->slots[0];
473 l = path->nodes[0];
474 btrfs_item_key_to_cpu(l, &found_key, slot);
475
476 if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
477 goto next2;
478
479 if (found_key.objectid > found_key.offset) {
480 /* parent <- member, not needed to build config */
481 /* FIXME should we omit the key completely? */
482 goto next2;
483 }
484
485 ret = add_relation_rb(fs_info, found_key.objectid,
486 found_key.offset);
ff24858c 487 if (ret == -ENOENT) {
efe120a0
FH
488 btrfs_warn(fs_info,
489 "orphan qgroup relation 0x%llx->0x%llx",
c1c9ff7c 490 found_key.objectid, found_key.offset);
ff24858c
AJ
491 ret = 0; /* ignore the error */
492 }
bed92eae
AJ
493 if (ret)
494 goto out;
495next2:
496 ret = btrfs_next_item(quota_root, path);
497 if (ret < 0)
498 goto out;
499 if (ret)
500 break;
501 }
502out:
503 fs_info->qgroup_flags |= flags;
afcdd129
JB
504 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
505 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
506 else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
507 ret >= 0)
b382a324 508 ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
bed92eae
AJ
509 btrfs_free_path(path);
510
eb1716af 511 if (ret < 0) {
1e8f9158 512 ulist_free(fs_info->qgroup_ulist);
eb1716af 513 fs_info->qgroup_ulist = NULL;
b382a324 514 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
eb1716af 515 }
1e8f9158 516
bed92eae
AJ
517 return ret < 0 ? ret : 0;
518}
519
520/*
e685da14
WS
521 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
522 * first two are in single-threaded paths.And for the third one, we have set
523 * quota_root to be null with qgroup_lock held before, so it is safe to clean
524 * up the in-memory structures without qgroup_lock held.
bed92eae
AJ
525 */
526void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
527{
528 struct rb_node *n;
529 struct btrfs_qgroup *qgroup;
bed92eae
AJ
530
531 while ((n = rb_first(&fs_info->qgroup_tree))) {
532 qgroup = rb_entry(n, struct btrfs_qgroup, node);
533 rb_erase(n, &fs_info->qgroup_tree);
4082bd3d 534 __del_qgroup_rb(qgroup);
bed92eae 535 }
1e7bac1e
WS
536 /*
537 * we call btrfs_free_qgroup_config() when umounting
01327610 538 * filesystem and disabling quota, so we set qgroup_ulist
1e7bac1e
WS
539 * to be null here to avoid double free.
540 */
1e8f9158 541 ulist_free(fs_info->qgroup_ulist);
1e7bac1e 542 fs_info->qgroup_ulist = NULL;
bed92eae
AJ
543}
544
545static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
546 struct btrfs_root *quota_root,
547 u64 src, u64 dst)
548{
549 int ret;
550 struct btrfs_path *path;
551 struct btrfs_key key;
552
553 path = btrfs_alloc_path();
554 if (!path)
555 return -ENOMEM;
556
557 key.objectid = src;
558 key.type = BTRFS_QGROUP_RELATION_KEY;
559 key.offset = dst;
560
561 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
562
563 btrfs_mark_buffer_dirty(path->nodes[0]);
564
565 btrfs_free_path(path);
566 return ret;
567}
568
569static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
570 struct btrfs_root *quota_root,
571 u64 src, u64 dst)
572{
573 int ret;
574 struct btrfs_path *path;
575 struct btrfs_key key;
576
577 path = btrfs_alloc_path();
578 if (!path)
579 return -ENOMEM;
580
581 key.objectid = src;
582 key.type = BTRFS_QGROUP_RELATION_KEY;
583 key.offset = dst;
584
585 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
586 if (ret < 0)
587 goto out;
588
589 if (ret > 0) {
590 ret = -ENOENT;
591 goto out;
592 }
593
594 ret = btrfs_del_item(trans, quota_root, path);
595out:
596 btrfs_free_path(path);
597 return ret;
598}
599
600static int add_qgroup_item(struct btrfs_trans_handle *trans,
601 struct btrfs_root *quota_root, u64 qgroupid)
602{
603 int ret;
604 struct btrfs_path *path;
605 struct btrfs_qgroup_info_item *qgroup_info;
606 struct btrfs_qgroup_limit_item *qgroup_limit;
607 struct extent_buffer *leaf;
608 struct btrfs_key key;
609
f5ee5c9a 610 if (btrfs_is_testing(quota_root->fs_info))
faa2dbf0 611 return 0;
fccb84c9 612
bed92eae
AJ
613 path = btrfs_alloc_path();
614 if (!path)
615 return -ENOMEM;
616
617 key.objectid = 0;
618 key.type = BTRFS_QGROUP_INFO_KEY;
619 key.offset = qgroupid;
620
0b4699dc
MF
621 /*
622 * Avoid a transaction abort by catching -EEXIST here. In that
623 * case, we proceed by re-initializing the existing structure
624 * on disk.
625 */
626
bed92eae
AJ
627 ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
628 sizeof(*qgroup_info));
0b4699dc 629 if (ret && ret != -EEXIST)
bed92eae
AJ
630 goto out;
631
632 leaf = path->nodes[0];
633 qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
634 struct btrfs_qgroup_info_item);
635 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
636 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
637 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
638 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
639 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
640
641 btrfs_mark_buffer_dirty(leaf);
642
643 btrfs_release_path(path);
644
645 key.type = BTRFS_QGROUP_LIMIT_KEY;
646 ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
647 sizeof(*qgroup_limit));
0b4699dc 648 if (ret && ret != -EEXIST)
bed92eae
AJ
649 goto out;
650
651 leaf = path->nodes[0];
652 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
653 struct btrfs_qgroup_limit_item);
654 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
655 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
656 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
657 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
658 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
659
660 btrfs_mark_buffer_dirty(leaf);
661
662 ret = 0;
663out:
664 btrfs_free_path(path);
665 return ret;
666}
667
668static int del_qgroup_item(struct btrfs_trans_handle *trans,
669 struct btrfs_root *quota_root, u64 qgroupid)
670{
671 int ret;
672 struct btrfs_path *path;
673 struct btrfs_key key;
674
675 path = btrfs_alloc_path();
676 if (!path)
677 return -ENOMEM;
678
679 key.objectid = 0;
680 key.type = BTRFS_QGROUP_INFO_KEY;
681 key.offset = qgroupid;
682 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
683 if (ret < 0)
684 goto out;
685
686 if (ret > 0) {
687 ret = -ENOENT;
688 goto out;
689 }
690
691 ret = btrfs_del_item(trans, quota_root, path);
692 if (ret)
693 goto out;
694
695 btrfs_release_path(path);
696
697 key.type = BTRFS_QGROUP_LIMIT_KEY;
698 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
699 if (ret < 0)
700 goto out;
701
702 if (ret > 0) {
703 ret = -ENOENT;
704 goto out;
705 }
706
707 ret = btrfs_del_item(trans, quota_root, path);
708
709out:
710 btrfs_free_path(path);
711 return ret;
712}
713
714static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
1510e71c
DY
715 struct btrfs_root *root,
716 struct btrfs_qgroup *qgroup)
bed92eae
AJ
717{
718 struct btrfs_path *path;
719 struct btrfs_key key;
720 struct extent_buffer *l;
721 struct btrfs_qgroup_limit_item *qgroup_limit;
722 int ret;
723 int slot;
724
725 key.objectid = 0;
726 key.type = BTRFS_QGROUP_LIMIT_KEY;
1510e71c 727 key.offset = qgroup->qgroupid;
bed92eae
AJ
728
729 path = btrfs_alloc_path();
84cbe2f7
WS
730 if (!path)
731 return -ENOMEM;
732
bed92eae
AJ
733 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
734 if (ret > 0)
735 ret = -ENOENT;
736
737 if (ret)
738 goto out;
739
740 l = path->nodes[0];
741 slot = path->slots[0];
a3df41ee 742 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
1510e71c
DY
743 btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags);
744 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer);
745 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
746 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
747 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
bed92eae
AJ
748
749 btrfs_mark_buffer_dirty(l);
750
751out:
752 btrfs_free_path(path);
753 return ret;
754}
755
756static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
757 struct btrfs_root *root,
758 struct btrfs_qgroup *qgroup)
759{
760 struct btrfs_path *path;
761 struct btrfs_key key;
762 struct extent_buffer *l;
763 struct btrfs_qgroup_info_item *qgroup_info;
764 int ret;
765 int slot;
766
f5ee5c9a 767 if (btrfs_is_testing(root->fs_info))
faa2dbf0 768 return 0;
fccb84c9 769
bed92eae
AJ
770 key.objectid = 0;
771 key.type = BTRFS_QGROUP_INFO_KEY;
772 key.offset = qgroup->qgroupid;
773
774 path = btrfs_alloc_path();
84cbe2f7
WS
775 if (!path)
776 return -ENOMEM;
777
bed92eae
AJ
778 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
779 if (ret > 0)
780 ret = -ENOENT;
781
782 if (ret)
783 goto out;
784
785 l = path->nodes[0];
786 slot = path->slots[0];
a3df41ee 787 qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
bed92eae
AJ
788 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
789 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
790 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
791 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
792 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
793
794 btrfs_mark_buffer_dirty(l);
795
796out:
797 btrfs_free_path(path);
798 return ret;
799}
800
801static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
802 struct btrfs_fs_info *fs_info,
803 struct btrfs_root *root)
804{
805 struct btrfs_path *path;
806 struct btrfs_key key;
807 struct extent_buffer *l;
808 struct btrfs_qgroup_status_item *ptr;
809 int ret;
810 int slot;
811
812 key.objectid = 0;
813 key.type = BTRFS_QGROUP_STATUS_KEY;
814 key.offset = 0;
815
816 path = btrfs_alloc_path();
84cbe2f7
WS
817 if (!path)
818 return -ENOMEM;
819
bed92eae
AJ
820 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
821 if (ret > 0)
822 ret = -ENOENT;
823
824 if (ret)
825 goto out;
826
827 l = path->nodes[0];
828 slot = path->slots[0];
829 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
830 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
831 btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
2f232036
JS
832 btrfs_set_qgroup_status_rescan(l, ptr,
833 fs_info->qgroup_rescan_progress.objectid);
bed92eae
AJ
834
835 btrfs_mark_buffer_dirty(l);
836
837out:
838 btrfs_free_path(path);
839 return ret;
840}
841
842/*
843 * called with qgroup_lock held
844 */
845static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
846 struct btrfs_root *root)
847{
848 struct btrfs_path *path;
849 struct btrfs_key key;
06b3a860 850 struct extent_buffer *leaf = NULL;
bed92eae 851 int ret;
06b3a860 852 int nr = 0;
bed92eae 853
bed92eae
AJ
854 path = btrfs_alloc_path();
855 if (!path)
856 return -ENOMEM;
857
06b3a860
WS
858 path->leave_spinning = 1;
859
860 key.objectid = 0;
861 key.offset = 0;
862 key.type = 0;
bed92eae 863
06b3a860 864 while (1) {
bed92eae 865 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
06b3a860
WS
866 if (ret < 0)
867 goto out;
868 leaf = path->nodes[0];
869 nr = btrfs_header_nritems(leaf);
870 if (!nr)
bed92eae 871 break;
06b3a860
WS
872 /*
873 * delete the leaf one by one
874 * since the whole tree is going
875 * to be deleted.
876 */
877 path->slots[0] = 0;
878 ret = btrfs_del_items(trans, root, path, 0, nr);
bed92eae
AJ
879 if (ret)
880 goto out;
06b3a860 881
bed92eae
AJ
882 btrfs_release_path(path);
883 }
884 ret = 0;
885out:
bed92eae
AJ
886 btrfs_free_path(path);
887 return ret;
888}
889
890int btrfs_quota_enable(struct btrfs_trans_handle *trans,
891 struct btrfs_fs_info *fs_info)
892{
893 struct btrfs_root *quota_root;
7708f029 894 struct btrfs_root *tree_root = fs_info->tree_root;
bed92eae
AJ
895 struct btrfs_path *path = NULL;
896 struct btrfs_qgroup_status_item *ptr;
897 struct extent_buffer *leaf;
898 struct btrfs_key key;
7708f029
WS
899 struct btrfs_key found_key;
900 struct btrfs_qgroup *qgroup = NULL;
bed92eae 901 int ret = 0;
7708f029 902 int slot;
bed92eae 903
f2f6ed3d 904 mutex_lock(&fs_info->qgroup_ioctl_lock);
5d23515b 905 if (fs_info->quota_root)
bed92eae 906 goto out;
bed92eae 907
52bf8e7a 908 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
1e8f9158
WS
909 if (!fs_info->qgroup_ulist) {
910 ret = -ENOMEM;
911 goto out;
912 }
913
bed92eae
AJ
914 /*
915 * initially create the quota tree
916 */
917 quota_root = btrfs_create_tree(trans, fs_info,
918 BTRFS_QUOTA_TREE_OBJECTID);
919 if (IS_ERR(quota_root)) {
920 ret = PTR_ERR(quota_root);
921 goto out;
922 }
923
924 path = btrfs_alloc_path();
5b7ff5b3
TI
925 if (!path) {
926 ret = -ENOMEM;
927 goto out_free_root;
928 }
bed92eae
AJ
929
930 key.objectid = 0;
931 key.type = BTRFS_QGROUP_STATUS_KEY;
932 key.offset = 0;
933
934 ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
935 sizeof(*ptr));
936 if (ret)
5b7ff5b3 937 goto out_free_path;
bed92eae
AJ
938
939 leaf = path->nodes[0];
940 ptr = btrfs_item_ptr(leaf, path->slots[0],
941 struct btrfs_qgroup_status_item);
942 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
943 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
944 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
945 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
946 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
2f232036 947 btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
bed92eae
AJ
948
949 btrfs_mark_buffer_dirty(leaf);
950
7708f029
WS
951 key.objectid = 0;
952 key.type = BTRFS_ROOT_REF_KEY;
953 key.offset = 0;
954
955 btrfs_release_path(path);
956 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
957 if (ret > 0)
958 goto out_add_root;
959 if (ret < 0)
960 goto out_free_path;
961
962
963 while (1) {
964 slot = path->slots[0];
965 leaf = path->nodes[0];
966 btrfs_item_key_to_cpu(leaf, &found_key, slot);
967
968 if (found_key.type == BTRFS_ROOT_REF_KEY) {
969 ret = add_qgroup_item(trans, quota_root,
970 found_key.offset);
971 if (ret)
972 goto out_free_path;
973
7708f029
WS
974 qgroup = add_qgroup_rb(fs_info, found_key.offset);
975 if (IS_ERR(qgroup)) {
7708f029
WS
976 ret = PTR_ERR(qgroup);
977 goto out_free_path;
978 }
7708f029
WS
979 }
980 ret = btrfs_next_item(tree_root, path);
981 if (ret < 0)
982 goto out_free_path;
983 if (ret)
984 break;
985 }
986
987out_add_root:
988 btrfs_release_path(path);
989 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
990 if (ret)
991 goto out_free_path;
992
7708f029
WS
993 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
994 if (IS_ERR(qgroup)) {
7708f029
WS
995 ret = PTR_ERR(qgroup);
996 goto out_free_path;
997 }
58400fce 998 spin_lock(&fs_info->qgroup_lock);
bed92eae 999 fs_info->quota_root = quota_root;
5d23515b 1000 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
bed92eae 1001 spin_unlock(&fs_info->qgroup_lock);
5d23515b
NB
1002 ret = qgroup_rescan_init(fs_info, 0, 1);
1003 if (!ret) {
1004 qgroup_rescan_zero_tracking(fs_info);
1005 btrfs_queue_work(fs_info->qgroup_rescan_workers,
1006 &fs_info->qgroup_rescan_work);
1007 }
1008
5b7ff5b3 1009out_free_path:
bed92eae 1010 btrfs_free_path(path);
5b7ff5b3
TI
1011out_free_root:
1012 if (ret) {
1013 free_extent_buffer(quota_root->node);
1014 free_extent_buffer(quota_root->commit_root);
1015 kfree(quota_root);
1016 }
1017out:
eb1716af 1018 if (ret) {
1e8f9158 1019 ulist_free(fs_info->qgroup_ulist);
eb1716af
JS
1020 fs_info->qgroup_ulist = NULL;
1021 }
f2f6ed3d 1022 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
1023 return ret;
1024}
1025
1026int btrfs_quota_disable(struct btrfs_trans_handle *trans,
1027 struct btrfs_fs_info *fs_info)
1028{
bed92eae
AJ
1029 struct btrfs_root *quota_root;
1030 int ret = 0;
1031
f2f6ed3d 1032 mutex_lock(&fs_info->qgroup_ioctl_lock);
58400fce 1033 if (!fs_info->quota_root)
f2f6ed3d 1034 goto out;
afcdd129 1035 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
d06f23d6 1036 btrfs_qgroup_wait_for_completion(fs_info, false);
967ef513 1037 spin_lock(&fs_info->qgroup_lock);
bed92eae
AJ
1038 quota_root = fs_info->quota_root;
1039 fs_info->quota_root = NULL;
8ea0ec9e 1040 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
bed92eae
AJ
1041 spin_unlock(&fs_info->qgroup_lock);
1042
e685da14
WS
1043 btrfs_free_qgroup_config(fs_info);
1044
bed92eae
AJ
1045 ret = btrfs_clean_quota_tree(trans, quota_root);
1046 if (ret)
1047 goto out;
1048
1cd5447e 1049 ret = btrfs_del_root(trans, fs_info, &quota_root->root_key);
bed92eae
AJ
1050 if (ret)
1051 goto out;
1052
1053 list_del(&quota_root->dirty_list);
1054
1055 btrfs_tree_lock(quota_root->node);
7c302b49 1056 clean_tree_block(fs_info, quota_root->node);
bed92eae
AJ
1057 btrfs_tree_unlock(quota_root->node);
1058 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
1059
1060 free_extent_buffer(quota_root->node);
1061 free_extent_buffer(quota_root->commit_root);
1062 kfree(quota_root);
1063out:
f2f6ed3d 1064 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
1065 return ret;
1066}
1067
2f232036
JS
1068static void qgroup_dirty(struct btrfs_fs_info *fs_info,
1069 struct btrfs_qgroup *qgroup)
bed92eae 1070{
2f232036
JS
1071 if (list_empty(&qgroup->dirty))
1072 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
bed92eae
AJ
1073}
1074
9c8b35b1 1075/*
429d6275
QW
1076 * The easy accounting, we're updating qgroup relationship whose child qgroup
1077 * only has exclusive extents.
1078 *
1079 * In this case, all exclsuive extents will also be exlusive for parent, so
1080 * excl/rfer just get added/removed.
1081 *
1082 * So is qgroup reservation space, which should also be added/removed to
1083 * parent.
1084 * Or when child tries to release reservation space, parent will underflow its
1085 * reservation (for relationship adding case).
9c8b35b1
QW
1086 *
1087 * Caller should hold fs_info->qgroup_lock.
1088 */
1089static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1090 struct ulist *tmp, u64 ref_root,
429d6275 1091 struct btrfs_qgroup *src, int sign)
9c8b35b1
QW
1092{
1093 struct btrfs_qgroup *qgroup;
1094 struct btrfs_qgroup_list *glist;
1095 struct ulist_node *unode;
1096 struct ulist_iterator uiter;
429d6275 1097 u64 num_bytes = src->excl;
9c8b35b1
QW
1098 int ret = 0;
1099
1100 qgroup = find_qgroup_rb(fs_info, ref_root);
1101 if (!qgroup)
1102 goto out;
1103
1104 qgroup->rfer += sign * num_bytes;
1105 qgroup->rfer_cmpr += sign * num_bytes;
1106
1107 WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1108 qgroup->excl += sign * num_bytes;
1109 qgroup->excl_cmpr += sign * num_bytes;
429d6275
QW
1110
1111 if (sign > 0)
64ee4e75 1112 qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
429d6275 1113 else
64ee4e75 1114 qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
9c8b35b1
QW
1115
1116 qgroup_dirty(fs_info, qgroup);
1117
1118 /* Get all of the parent groups that contain this qgroup */
1119 list_for_each_entry(glist, &qgroup->groups, next_group) {
1120 ret = ulist_add(tmp, glist->group->qgroupid,
ef2fff64 1121 qgroup_to_aux(glist->group), GFP_ATOMIC);
9c8b35b1
QW
1122 if (ret < 0)
1123 goto out;
1124 }
1125
1126 /* Iterate all of the parents and adjust their reference counts */
1127 ULIST_ITER_INIT(&uiter);
1128 while ((unode = ulist_next(tmp, &uiter))) {
ef2fff64 1129 qgroup = unode_aux_to_qgroup(unode);
9c8b35b1
QW
1130 qgroup->rfer += sign * num_bytes;
1131 qgroup->rfer_cmpr += sign * num_bytes;
1132 WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1133 qgroup->excl += sign * num_bytes;
429d6275 1134 if (sign > 0)
64ee4e75 1135 qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
429d6275 1136 else
64ee4e75 1137 qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
9c8b35b1
QW
1138 qgroup->excl_cmpr += sign * num_bytes;
1139 qgroup_dirty(fs_info, qgroup);
1140
1141 /* Add any parents of the parents */
1142 list_for_each_entry(glist, &qgroup->groups, next_group) {
1143 ret = ulist_add(tmp, glist->group->qgroupid,
ef2fff64 1144 qgroup_to_aux(glist->group), GFP_ATOMIC);
9c8b35b1
QW
1145 if (ret < 0)
1146 goto out;
1147 }
1148 }
1149 ret = 0;
1150out:
1151 return ret;
1152}
1153
1154
1155/*
1156 * Quick path for updating qgroup with only excl refs.
1157 *
1158 * In that case, just update all parent will be enough.
1159 * Or we needs to do a full rescan.
1160 * Caller should also hold fs_info->qgroup_lock.
1161 *
1162 * Return 0 for quick update, return >0 for need to full rescan
1163 * and mark INCONSISTENT flag.
1164 * Return < 0 for other error.
1165 */
1166static int quick_update_accounting(struct btrfs_fs_info *fs_info,
1167 struct ulist *tmp, u64 src, u64 dst,
1168 int sign)
1169{
1170 struct btrfs_qgroup *qgroup;
1171 int ret = 1;
1172 int err = 0;
1173
1174 qgroup = find_qgroup_rb(fs_info, src);
1175 if (!qgroup)
1176 goto out;
1177 if (qgroup->excl == qgroup->rfer) {
1178 ret = 0;
1179 err = __qgroup_excl_accounting(fs_info, tmp, dst,
429d6275 1180 qgroup, sign);
9c8b35b1
QW
1181 if (err < 0) {
1182 ret = err;
1183 goto out;
1184 }
1185 }
1186out:
1187 if (ret)
1188 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1189 return ret;
1190}
1191
bed92eae
AJ
1192int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
1193 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1194{
1195 struct btrfs_root *quota_root;
b7fef4f5
WS
1196 struct btrfs_qgroup *parent;
1197 struct btrfs_qgroup *member;
534e6623 1198 struct btrfs_qgroup_list *list;
9c8b35b1 1199 struct ulist *tmp;
bed92eae
AJ
1200 int ret = 0;
1201
8465ecec
QW
1202 /* Check the level of src and dst first */
1203 if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
1204 return -EINVAL;
1205
6602caf1 1206 tmp = ulist_alloc(GFP_KERNEL);
ab3680dd
CE
1207 if (!tmp)
1208 return -ENOMEM;
1209
f2f6ed3d 1210 mutex_lock(&fs_info->qgroup_ioctl_lock);
bed92eae 1211 quota_root = fs_info->quota_root;
f2f6ed3d
WS
1212 if (!quota_root) {
1213 ret = -EINVAL;
1214 goto out;
1215 }
b7fef4f5
WS
1216 member = find_qgroup_rb(fs_info, src);
1217 parent = find_qgroup_rb(fs_info, dst);
1218 if (!member || !parent) {
1219 ret = -EINVAL;
1220 goto out;
1221 }
bed92eae 1222
534e6623
WS
1223 /* check if such qgroup relation exist firstly */
1224 list_for_each_entry(list, &member->groups, next_group) {
1225 if (list->group == parent) {
1226 ret = -EEXIST;
1227 goto out;
1228 }
1229 }
1230
bed92eae
AJ
1231 ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1232 if (ret)
f2f6ed3d 1233 goto out;
bed92eae
AJ
1234
1235 ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1236 if (ret) {
1237 del_qgroup_relation_item(trans, quota_root, src, dst);
f2f6ed3d 1238 goto out;
bed92eae
AJ
1239 }
1240
1241 spin_lock(&fs_info->qgroup_lock);
0b246afa 1242 ret = add_relation_rb(fs_info, src, dst);
9c8b35b1
QW
1243 if (ret < 0) {
1244 spin_unlock(&fs_info->qgroup_lock);
1245 goto out;
1246 }
1247 ret = quick_update_accounting(fs_info, tmp, src, dst, 1);
bed92eae 1248 spin_unlock(&fs_info->qgroup_lock);
f2f6ed3d
WS
1249out:
1250 mutex_unlock(&fs_info->qgroup_ioctl_lock);
9c8b35b1 1251 ulist_free(tmp);
bed92eae
AJ
1252 return ret;
1253}
1254
025db916 1255static int __del_qgroup_relation(struct btrfs_trans_handle *trans,
bed92eae
AJ
1256 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1257{
1258 struct btrfs_root *quota_root;
534e6623
WS
1259 struct btrfs_qgroup *parent;
1260 struct btrfs_qgroup *member;
1261 struct btrfs_qgroup_list *list;
9c8b35b1 1262 struct ulist *tmp;
bed92eae
AJ
1263 int ret = 0;
1264 int err;
1265
6602caf1 1266 tmp = ulist_alloc(GFP_KERNEL);
9c8b35b1
QW
1267 if (!tmp)
1268 return -ENOMEM;
1269
bed92eae 1270 quota_root = fs_info->quota_root;
f2f6ed3d
WS
1271 if (!quota_root) {
1272 ret = -EINVAL;
1273 goto out;
1274 }
bed92eae 1275
534e6623
WS
1276 member = find_qgroup_rb(fs_info, src);
1277 parent = find_qgroup_rb(fs_info, dst);
1278 if (!member || !parent) {
1279 ret = -EINVAL;
1280 goto out;
1281 }
1282
1283 /* check if such qgroup relation exist firstly */
1284 list_for_each_entry(list, &member->groups, next_group) {
1285 if (list->group == parent)
1286 goto exist;
1287 }
1288 ret = -ENOENT;
1289 goto out;
1290exist:
bed92eae
AJ
1291 ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1292 err = del_qgroup_relation_item(trans, quota_root, dst, src);
1293 if (err && !ret)
1294 ret = err;
1295
1296 spin_lock(&fs_info->qgroup_lock);
1297 del_relation_rb(fs_info, src, dst);
9c8b35b1 1298 ret = quick_update_accounting(fs_info, tmp, src, dst, -1);
bed92eae 1299 spin_unlock(&fs_info->qgroup_lock);
f2f6ed3d 1300out:
9c8b35b1 1301 ulist_free(tmp);
f5a6b1c5
DY
1302 return ret;
1303}
1304
1305int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1306 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1307{
1308 int ret = 0;
1309
1310 mutex_lock(&fs_info->qgroup_ioctl_lock);
1311 ret = __del_qgroup_relation(trans, fs_info, src, dst);
f2f6ed3d 1312 mutex_unlock(&fs_info->qgroup_ioctl_lock);
f5a6b1c5 1313
bed92eae
AJ
1314 return ret;
1315}
1316
1317int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
4087cf24 1318 struct btrfs_fs_info *fs_info, u64 qgroupid)
bed92eae
AJ
1319{
1320 struct btrfs_root *quota_root;
1321 struct btrfs_qgroup *qgroup;
1322 int ret = 0;
1323
f2f6ed3d 1324 mutex_lock(&fs_info->qgroup_ioctl_lock);
bed92eae 1325 quota_root = fs_info->quota_root;
f2f6ed3d
WS
1326 if (!quota_root) {
1327 ret = -EINVAL;
1328 goto out;
1329 }
534e6623
WS
1330 qgroup = find_qgroup_rb(fs_info, qgroupid);
1331 if (qgroup) {
1332 ret = -EEXIST;
1333 goto out;
1334 }
bed92eae
AJ
1335
1336 ret = add_qgroup_item(trans, quota_root, qgroupid);
534e6623
WS
1337 if (ret)
1338 goto out;
bed92eae
AJ
1339
1340 spin_lock(&fs_info->qgroup_lock);
1341 qgroup = add_qgroup_rb(fs_info, qgroupid);
1342 spin_unlock(&fs_info->qgroup_lock);
1343
1344 if (IS_ERR(qgroup))
1345 ret = PTR_ERR(qgroup);
f2f6ed3d
WS
1346out:
1347 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
1348 return ret;
1349}
1350
1351int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1352 struct btrfs_fs_info *fs_info, u64 qgroupid)
1353{
1354 struct btrfs_root *quota_root;
2cf68703 1355 struct btrfs_qgroup *qgroup;
f5a6b1c5 1356 struct btrfs_qgroup_list *list;
bed92eae
AJ
1357 int ret = 0;
1358
f2f6ed3d 1359 mutex_lock(&fs_info->qgroup_ioctl_lock);
bed92eae 1360 quota_root = fs_info->quota_root;
f2f6ed3d
WS
1361 if (!quota_root) {
1362 ret = -EINVAL;
1363 goto out;
1364 }
bed92eae 1365
2cf68703 1366 qgroup = find_qgroup_rb(fs_info, qgroupid);
534e6623
WS
1367 if (!qgroup) {
1368 ret = -ENOENT;
1369 goto out;
1370 } else {
f5a6b1c5
DY
1371 /* check if there are no children of this qgroup */
1372 if (!list_empty(&qgroup->members)) {
f2f6ed3d
WS
1373 ret = -EBUSY;
1374 goto out;
2cf68703
AJ
1375 }
1376 }
bed92eae 1377 ret = del_qgroup_item(trans, quota_root, qgroupid);
36b96fdc
SD
1378 if (ret && ret != -ENOENT)
1379 goto out;
bed92eae 1380
f5a6b1c5
DY
1381 while (!list_empty(&qgroup->groups)) {
1382 list = list_first_entry(&qgroup->groups,
1383 struct btrfs_qgroup_list, next_group);
1384 ret = __del_qgroup_relation(trans, fs_info,
1385 qgroupid,
1386 list->group->qgroupid);
1387 if (ret)
1388 goto out;
1389 }
1390
bed92eae 1391 spin_lock(&fs_info->qgroup_lock);
0b246afa 1392 del_qgroup_rb(fs_info, qgroupid);
bed92eae 1393 spin_unlock(&fs_info->qgroup_lock);
f2f6ed3d
WS
1394out:
1395 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
1396 return ret;
1397}
1398
1399int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1400 struct btrfs_fs_info *fs_info, u64 qgroupid,
1401 struct btrfs_qgroup_limit *limit)
1402{
f2f6ed3d 1403 struct btrfs_root *quota_root;
bed92eae
AJ
1404 struct btrfs_qgroup *qgroup;
1405 int ret = 0;
fe759907
YD
1406 /* Sometimes we would want to clear the limit on this qgroup.
1407 * To meet this requirement, we treat the -1 as a special value
1408 * which tell kernel to clear the limit on this qgroup.
1409 */
1410 const u64 CLEAR_VALUE = -1;
bed92eae 1411
f2f6ed3d
WS
1412 mutex_lock(&fs_info->qgroup_ioctl_lock);
1413 quota_root = fs_info->quota_root;
1414 if (!quota_root) {
1415 ret = -EINVAL;
1416 goto out;
1417 }
bed92eae 1418
ddb47afa
WS
1419 qgroup = find_qgroup_rb(fs_info, qgroupid);
1420 if (!qgroup) {
1421 ret = -ENOENT;
1422 goto out;
1423 }
bed92eae 1424
58400fce 1425 spin_lock(&fs_info->qgroup_lock);
fe759907
YD
1426 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) {
1427 if (limit->max_rfer == CLEAR_VALUE) {
1428 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
1429 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
1430 qgroup->max_rfer = 0;
1431 } else {
1432 qgroup->max_rfer = limit->max_rfer;
1433 }
1434 }
1435 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) {
1436 if (limit->max_excl == CLEAR_VALUE) {
1437 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
1438 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
1439 qgroup->max_excl = 0;
1440 } else {
1441 qgroup->max_excl = limit->max_excl;
1442 }
1443 }
1444 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) {
1445 if (limit->rsv_rfer == CLEAR_VALUE) {
1446 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
1447 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
1448 qgroup->rsv_rfer = 0;
1449 } else {
1450 qgroup->rsv_rfer = limit->rsv_rfer;
1451 }
1452 }
1453 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) {
1454 if (limit->rsv_excl == CLEAR_VALUE) {
1455 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
1456 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
1457 qgroup->rsv_excl = 0;
1458 } else {
1459 qgroup->rsv_excl = limit->rsv_excl;
1460 }
1461 }
03477d94
DY
1462 qgroup->lim_flags |= limit->flags;
1463
bed92eae 1464 spin_unlock(&fs_info->qgroup_lock);
1510e71c
DY
1465
1466 ret = update_qgroup_limit_item(trans, quota_root, qgroup);
1467 if (ret) {
1468 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1469 btrfs_info(fs_info, "unable to update quota limit for %llu",
1470 qgroupid);
1471 }
1472
f2f6ed3d
WS
1473out:
1474 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
1475 return ret;
1476}
1152651a 1477
50b3e040 1478int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
cb93b52c
QW
1479 struct btrfs_delayed_ref_root *delayed_refs,
1480 struct btrfs_qgroup_extent_record *record)
3368d001
QW
1481{
1482 struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
1483 struct rb_node *parent_node = NULL;
1484 struct btrfs_qgroup_extent_record *entry;
1485 u64 bytenr = record->bytenr;
1486
82bd101b 1487 assert_spin_locked(&delayed_refs->lock);
50b3e040 1488 trace_btrfs_qgroup_trace_extent(fs_info, record);
82bd101b 1489
3368d001
QW
1490 while (*p) {
1491 parent_node = *p;
1492 entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
1493 node);
1494 if (bytenr < entry->bytenr)
1495 p = &(*p)->rb_left;
1496 else if (bytenr > entry->bytenr)
1497 p = &(*p)->rb_right;
1498 else
cb93b52c 1499 return 1;
3368d001
QW
1500 }
1501
1502 rb_link_node(&record->node, parent_node, p);
1503 rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
cb93b52c
QW
1504 return 0;
1505}
1506
fb235dc0
QW
1507int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
1508 struct btrfs_qgroup_extent_record *qrecord)
1509{
1510 struct ulist *old_root;
1511 u64 bytenr = qrecord->bytenr;
1512 int ret;
1513
c995ab3c 1514 ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
952bd3db
NB
1515 if (ret < 0) {
1516 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1517 btrfs_warn(fs_info,
1518"error accounting new delayed refs extent (err code: %d), quota inconsistent",
1519 ret);
1520 return 0;
1521 }
fb235dc0
QW
1522
1523 /*
1524 * Here we don't need to get the lock of
1525 * trans->transaction->delayed_refs, since inserted qrecord won't
1526 * be deleted, only qrecord->node may be modified (new qrecord insert)
1527 *
1528 * So modifying qrecord->old_roots is safe here
1529 */
1530 qrecord->old_roots = old_root;
1531 return 0;
1532}
1533
50b3e040 1534int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
cb93b52c
QW
1535 struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
1536 gfp_t gfp_flag)
1537{
1538 struct btrfs_qgroup_extent_record *record;
1539 struct btrfs_delayed_ref_root *delayed_refs;
1540 int ret;
1541
afcdd129
JB
1542 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)
1543 || bytenr == 0 || num_bytes == 0)
cb93b52c
QW
1544 return 0;
1545 if (WARN_ON(trans == NULL))
1546 return -EINVAL;
1547 record = kmalloc(sizeof(*record), gfp_flag);
1548 if (!record)
1549 return -ENOMEM;
1550
1551 delayed_refs = &trans->transaction->delayed_refs;
1552 record->bytenr = bytenr;
1553 record->num_bytes = num_bytes;
1554 record->old_roots = NULL;
1555
1556 spin_lock(&delayed_refs->lock);
2ff7e61e 1557 ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
cb93b52c 1558 spin_unlock(&delayed_refs->lock);
fb235dc0 1559 if (ret > 0) {
cb93b52c 1560 kfree(record);
fb235dc0
QW
1561 return 0;
1562 }
1563 return btrfs_qgroup_trace_extent_post(fs_info, record);
3368d001
QW
1564}
1565
33d1f05c 1566int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
2ff7e61e 1567 struct btrfs_fs_info *fs_info,
33d1f05c
QW
1568 struct extent_buffer *eb)
1569{
1570 int nr = btrfs_header_nritems(eb);
1571 int i, extent_type, ret;
1572 struct btrfs_key key;
1573 struct btrfs_file_extent_item *fi;
1574 u64 bytenr, num_bytes;
1575
1576 /* We can be called directly from walk_up_proc() */
0b246afa 1577 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
33d1f05c
QW
1578 return 0;
1579
1580 for (i = 0; i < nr; i++) {
1581 btrfs_item_key_to_cpu(eb, &key, i);
1582
1583 if (key.type != BTRFS_EXTENT_DATA_KEY)
1584 continue;
1585
1586 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
1587 /* filter out non qgroup-accountable extents */
1588 extent_type = btrfs_file_extent_type(eb, fi);
1589
1590 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
1591 continue;
1592
1593 bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1594 if (!bytenr)
1595 continue;
1596
1597 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1598
0b246afa
JM
1599 ret = btrfs_qgroup_trace_extent(trans, fs_info, bytenr,
1600 num_bytes, GFP_NOFS);
33d1f05c
QW
1601 if (ret)
1602 return ret;
1603 }
cddf3b2c 1604 cond_resched();
33d1f05c
QW
1605 return 0;
1606}
1607
1608/*
1609 * Walk up the tree from the bottom, freeing leaves and any interior
1610 * nodes which have had all slots visited. If a node (leaf or
1611 * interior) is freed, the node above it will have it's slot
1612 * incremented. The root node will never be freed.
1613 *
1614 * At the end of this function, we should have a path which has all
1615 * slots incremented to the next position for a search. If we need to
1616 * read a new node it will be NULL and the node above it will have the
1617 * correct slot selected for a later read.
1618 *
1619 * If we increment the root nodes slot counter past the number of
1620 * elements, 1 is returned to signal completion of the search.
1621 */
15b34517 1622static int adjust_slots_upwards(struct btrfs_path *path, int root_level)
33d1f05c
QW
1623{
1624 int level = 0;
1625 int nr, slot;
1626 struct extent_buffer *eb;
1627
1628 if (root_level == 0)
1629 return 1;
1630
1631 while (level <= root_level) {
1632 eb = path->nodes[level];
1633 nr = btrfs_header_nritems(eb);
1634 path->slots[level]++;
1635 slot = path->slots[level];
1636 if (slot >= nr || level == 0) {
1637 /*
1638 * Don't free the root - we will detect this
1639 * condition after our loop and return a
1640 * positive value for caller to stop walking the tree.
1641 */
1642 if (level != root_level) {
1643 btrfs_tree_unlock_rw(eb, path->locks[level]);
1644 path->locks[level] = 0;
1645
1646 free_extent_buffer(eb);
1647 path->nodes[level] = NULL;
1648 path->slots[level] = 0;
1649 }
1650 } else {
1651 /*
1652 * We have a valid slot to walk back down
1653 * from. Stop here so caller can process these
1654 * new nodes.
1655 */
1656 break;
1657 }
1658
1659 level++;
1660 }
1661
1662 eb = path->nodes[root_level];
1663 if (path->slots[root_level] >= btrfs_header_nritems(eb))
1664 return 1;
1665
1666 return 0;
1667}
1668
1669int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
1670 struct btrfs_root *root,
1671 struct extent_buffer *root_eb,
1672 u64 root_gen, int root_level)
1673{
0b246afa 1674 struct btrfs_fs_info *fs_info = root->fs_info;
33d1f05c
QW
1675 int ret = 0;
1676 int level;
1677 struct extent_buffer *eb = root_eb;
1678 struct btrfs_path *path = NULL;
1679
b6e6bca5 1680 BUG_ON(root_level < 0 || root_level >= BTRFS_MAX_LEVEL);
33d1f05c
QW
1681 BUG_ON(root_eb == NULL);
1682
0b246afa 1683 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
33d1f05c
QW
1684 return 0;
1685
1686 if (!extent_buffer_uptodate(root_eb)) {
1687 ret = btrfs_read_buffer(root_eb, root_gen);
1688 if (ret)
1689 goto out;
1690 }
1691
1692 if (root_level == 0) {
2ff7e61e 1693 ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, root_eb);
33d1f05c
QW
1694 goto out;
1695 }
1696
1697 path = btrfs_alloc_path();
1698 if (!path)
1699 return -ENOMEM;
1700
1701 /*
1702 * Walk down the tree. Missing extent blocks are filled in as
1703 * we go. Metadata is accounted every time we read a new
1704 * extent block.
1705 *
1706 * When we reach a leaf, we account for file extent items in it,
1707 * walk back up the tree (adjusting slot pointers as we go)
1708 * and restart the search process.
1709 */
1710 extent_buffer_get(root_eb); /* For path */
1711 path->nodes[root_level] = root_eb;
1712 path->slots[root_level] = 0;
1713 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
1714walk_down:
1715 level = root_level;
1716 while (level >= 0) {
1717 if (path->nodes[level] == NULL) {
1718 int parent_slot;
1719 u64 child_gen;
1720 u64 child_bytenr;
1721
1722 /*
1723 * We need to get child blockptr/gen from parent before
1724 * we can read it.
1725 */
1726 eb = path->nodes[level + 1];
1727 parent_slot = path->slots[level + 1];
1728 child_bytenr = btrfs_node_blockptr(eb, parent_slot);
1729 child_gen = btrfs_node_ptr_generation(eb, parent_slot);
1730
2ff7e61e 1731 eb = read_tree_block(fs_info, child_bytenr, child_gen);
33d1f05c
QW
1732 if (IS_ERR(eb)) {
1733 ret = PTR_ERR(eb);
1734 goto out;
1735 } else if (!extent_buffer_uptodate(eb)) {
1736 free_extent_buffer(eb);
1737 ret = -EIO;
1738 goto out;
1739 }
1740
1741 path->nodes[level] = eb;
1742 path->slots[level] = 0;
1743
1744 btrfs_tree_read_lock(eb);
1745 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1746 path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
1747
0b246afa
JM
1748 ret = btrfs_qgroup_trace_extent(trans, fs_info,
1749 child_bytenr,
1750 fs_info->nodesize,
1751 GFP_NOFS);
33d1f05c
QW
1752 if (ret)
1753 goto out;
1754 }
1755
1756 if (level == 0) {
2ff7e61e
JM
1757 ret = btrfs_qgroup_trace_leaf_items(trans,fs_info,
1758 path->nodes[level]);
33d1f05c
QW
1759 if (ret)
1760 goto out;
1761
1762 /* Nonzero return here means we completed our search */
15b34517 1763 ret = adjust_slots_upwards(path, root_level);
33d1f05c
QW
1764 if (ret)
1765 break;
1766
1767 /* Restart search with new slots */
1768 goto walk_down;
1769 }
1770
1771 level--;
1772 }
1773
1774 ret = 0;
1775out:
1776 btrfs_free_path(path);
1777
1778 return ret;
1779}
1780
d810ef2b
QW
1781#define UPDATE_NEW 0
1782#define UPDATE_OLD 1
1783/*
1784 * Walk all of the roots that points to the bytenr and adjust their refcnts.
1785 */
1786static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
1787 struct ulist *roots, struct ulist *tmp,
1788 struct ulist *qgroups, u64 seq, int update_old)
1789{
1790 struct ulist_node *unode;
1791 struct ulist_iterator uiter;
1792 struct ulist_node *tmp_unode;
1793 struct ulist_iterator tmp_uiter;
1794 struct btrfs_qgroup *qg;
1795 int ret = 0;
1796
1797 if (!roots)
1798 return 0;
1799 ULIST_ITER_INIT(&uiter);
1800 while ((unode = ulist_next(roots, &uiter))) {
1801 qg = find_qgroup_rb(fs_info, unode->val);
1802 if (!qg)
1803 continue;
1804
1805 ulist_reinit(tmp);
ef2fff64 1806 ret = ulist_add(qgroups, qg->qgroupid, qgroup_to_aux(qg),
d810ef2b
QW
1807 GFP_ATOMIC);
1808 if (ret < 0)
1809 return ret;
ef2fff64 1810 ret = ulist_add(tmp, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC);
d810ef2b
QW
1811 if (ret < 0)
1812 return ret;
1813 ULIST_ITER_INIT(&tmp_uiter);
1814 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1815 struct btrfs_qgroup_list *glist;
1816
ef2fff64 1817 qg = unode_aux_to_qgroup(tmp_unode);
d810ef2b
QW
1818 if (update_old)
1819 btrfs_qgroup_update_old_refcnt(qg, seq, 1);
1820 else
1821 btrfs_qgroup_update_new_refcnt(qg, seq, 1);
1822 list_for_each_entry(glist, &qg->groups, next_group) {
1823 ret = ulist_add(qgroups, glist->group->qgroupid,
ef2fff64 1824 qgroup_to_aux(glist->group),
d810ef2b
QW
1825 GFP_ATOMIC);
1826 if (ret < 0)
1827 return ret;
1828 ret = ulist_add(tmp, glist->group->qgroupid,
ef2fff64 1829 qgroup_to_aux(glist->group),
d810ef2b
QW
1830 GFP_ATOMIC);
1831 if (ret < 0)
1832 return ret;
1833 }
1834 }
1835 }
1836 return 0;
1837}
1838
823ae5b8
QW
1839/*
1840 * Update qgroup rfer/excl counters.
1841 * Rfer update is easy, codes can explain themselves.
e69bcee3 1842 *
823ae5b8
QW
1843 * Excl update is tricky, the update is split into 2 part.
1844 * Part 1: Possible exclusive <-> sharing detect:
1845 * | A | !A |
1846 * -------------------------------------
1847 * B | * | - |
1848 * -------------------------------------
1849 * !B | + | ** |
1850 * -------------------------------------
1851 *
1852 * Conditions:
1853 * A: cur_old_roots < nr_old_roots (not exclusive before)
1854 * !A: cur_old_roots == nr_old_roots (possible exclusive before)
1855 * B: cur_new_roots < nr_new_roots (not exclusive now)
01327610 1856 * !B: cur_new_roots == nr_new_roots (possible exclusive now)
823ae5b8
QW
1857 *
1858 * Results:
1859 * +: Possible sharing -> exclusive -: Possible exclusive -> sharing
1860 * *: Definitely not changed. **: Possible unchanged.
1861 *
1862 * For !A and !B condition, the exception is cur_old/new_roots == 0 case.
1863 *
1864 * To make the logic clear, we first use condition A and B to split
1865 * combination into 4 results.
1866 *
1867 * Then, for result "+" and "-", check old/new_roots == 0 case, as in them
1868 * only on variant maybe 0.
1869 *
1870 * Lastly, check result **, since there are 2 variants maybe 0, split them
1871 * again(2x2).
1872 * But this time we don't need to consider other things, the codes and logic
1873 * is easy to understand now.
1874 */
1875static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
1876 struct ulist *qgroups,
1877 u64 nr_old_roots,
1878 u64 nr_new_roots,
1879 u64 num_bytes, u64 seq)
1880{
1881 struct ulist_node *unode;
1882 struct ulist_iterator uiter;
1883 struct btrfs_qgroup *qg;
1884 u64 cur_new_count, cur_old_count;
1885
1886 ULIST_ITER_INIT(&uiter);
1887 while ((unode = ulist_next(qgroups, &uiter))) {
1888 bool dirty = false;
1889
ef2fff64 1890 qg = unode_aux_to_qgroup(unode);
823ae5b8
QW
1891 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
1892 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
1893
bc074524
JM
1894 trace_qgroup_update_counters(fs_info, qg->qgroupid,
1895 cur_old_count, cur_new_count);
0f5dcf8d 1896
823ae5b8
QW
1897 /* Rfer update part */
1898 if (cur_old_count == 0 && cur_new_count > 0) {
1899 qg->rfer += num_bytes;
1900 qg->rfer_cmpr += num_bytes;
1901 dirty = true;
1902 }
1903 if (cur_old_count > 0 && cur_new_count == 0) {
1904 qg->rfer -= num_bytes;
1905 qg->rfer_cmpr -= num_bytes;
1906 dirty = true;
1907 }
1908
1909 /* Excl update part */
1910 /* Exclusive/none -> shared case */
1911 if (cur_old_count == nr_old_roots &&
1912 cur_new_count < nr_new_roots) {
1913 /* Exclusive -> shared */
1914 if (cur_old_count != 0) {
1915 qg->excl -= num_bytes;
1916 qg->excl_cmpr -= num_bytes;
1917 dirty = true;
1918 }
1919 }
1920
1921 /* Shared -> exclusive/none case */
1922 if (cur_old_count < nr_old_roots &&
1923 cur_new_count == nr_new_roots) {
1924 /* Shared->exclusive */
1925 if (cur_new_count != 0) {
1926 qg->excl += num_bytes;
1927 qg->excl_cmpr += num_bytes;
1928 dirty = true;
1929 }
1930 }
1931
1932 /* Exclusive/none -> exclusive/none case */
1933 if (cur_old_count == nr_old_roots &&
1934 cur_new_count == nr_new_roots) {
1935 if (cur_old_count == 0) {
1936 /* None -> exclusive/none */
1937
1938 if (cur_new_count != 0) {
1939 /* None -> exclusive */
1940 qg->excl += num_bytes;
1941 qg->excl_cmpr += num_bytes;
1942 dirty = true;
1943 }
1944 /* None -> none, nothing changed */
1945 } else {
1946 /* Exclusive -> exclusive/none */
1947
1948 if (cur_new_count == 0) {
1949 /* Exclusive -> none */
1950 qg->excl -= num_bytes;
1951 qg->excl_cmpr -= num_bytes;
1952 dirty = true;
1953 }
1954 /* Exclusive -> exclusive, nothing changed */
1955 }
1956 }
c05f9429 1957
823ae5b8
QW
1958 if (dirty)
1959 qgroup_dirty(fs_info, qg);
1960 }
1961 return 0;
1962}
1963
5edfd9fd
QW
1964/*
1965 * Check if the @roots potentially is a list of fs tree roots
1966 *
1967 * Return 0 for definitely not a fs/subvol tree roots ulist
1968 * Return 1 for possible fs/subvol tree roots in the list (considering an empty
1969 * one as well)
1970 */
1971static int maybe_fs_roots(struct ulist *roots)
1972{
1973 struct ulist_node *unode;
1974 struct ulist_iterator uiter;
1975
1976 /* Empty one, still possible for fs roots */
1977 if (!roots || roots->nnodes == 0)
1978 return 1;
1979
1980 ULIST_ITER_INIT(&uiter);
1981 unode = ulist_next(roots, &uiter);
1982 if (!unode)
1983 return 1;
1984
1985 /*
1986 * If it contains fs tree roots, then it must belong to fs/subvol
1987 * trees.
1988 * If it contains a non-fs tree, it won't be shared with fs/subvol trees.
1989 */
1990 return is_fstree(unode->val);
1991}
1992
442244c9 1993int
550d7a2e
QW
1994btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
1995 struct btrfs_fs_info *fs_info,
1996 u64 bytenr, u64 num_bytes,
1997 struct ulist *old_roots, struct ulist *new_roots)
1998{
1999 struct ulist *qgroups = NULL;
2000 struct ulist *tmp = NULL;
2001 u64 seq;
2002 u64 nr_new_roots = 0;
2003 u64 nr_old_roots = 0;
2004 int ret = 0;
2005
81353d50
DS
2006 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
2007 return 0;
2008
5edfd9fd
QW
2009 if (new_roots) {
2010 if (!maybe_fs_roots(new_roots))
2011 goto out_free;
550d7a2e 2012 nr_new_roots = new_roots->nnodes;
5edfd9fd
QW
2013 }
2014 if (old_roots) {
2015 if (!maybe_fs_roots(old_roots))
2016 goto out_free;
550d7a2e 2017 nr_old_roots = old_roots->nnodes;
5edfd9fd
QW
2018 }
2019
2020 /* Quick exit, either not fs tree roots, or won't affect any qgroup */
2021 if (nr_old_roots == 0 && nr_new_roots == 0)
2022 goto out_free;
550d7a2e 2023
550d7a2e
QW
2024 BUG_ON(!fs_info->quota_root);
2025
bc074524
JM
2026 trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes,
2027 nr_old_roots, nr_new_roots);
0f5dcf8d 2028
550d7a2e
QW
2029 qgroups = ulist_alloc(GFP_NOFS);
2030 if (!qgroups) {
2031 ret = -ENOMEM;
2032 goto out_free;
2033 }
2034 tmp = ulist_alloc(GFP_NOFS);
2035 if (!tmp) {
2036 ret = -ENOMEM;
2037 goto out_free;
2038 }
2039
2040 mutex_lock(&fs_info->qgroup_rescan_lock);
2041 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
2042 if (fs_info->qgroup_rescan_progress.objectid <= bytenr) {
2043 mutex_unlock(&fs_info->qgroup_rescan_lock);
2044 ret = 0;
2045 goto out_free;
2046 }
2047 }
2048 mutex_unlock(&fs_info->qgroup_rescan_lock);
2049
2050 spin_lock(&fs_info->qgroup_lock);
2051 seq = fs_info->qgroup_seq;
2052
2053 /* Update old refcnts using old_roots */
2054 ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq,
2055 UPDATE_OLD);
2056 if (ret < 0)
2057 goto out;
2058
2059 /* Update new refcnts using new_roots */
2060 ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq,
2061 UPDATE_NEW);
2062 if (ret < 0)
2063 goto out;
2064
2065 qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots,
2066 num_bytes, seq);
2067
2068 /*
2069 * Bump qgroup_seq to avoid seq overlap
2070 */
2071 fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1;
2072out:
2073 spin_unlock(&fs_info->qgroup_lock);
2074out_free:
2075 ulist_free(tmp);
2076 ulist_free(qgroups);
2077 ulist_free(old_roots);
2078 ulist_free(new_roots);
2079 return ret;
2080}
2081
460fb20a 2082int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
550d7a2e 2083{
460fb20a 2084 struct btrfs_fs_info *fs_info = trans->fs_info;
550d7a2e
QW
2085 struct btrfs_qgroup_extent_record *record;
2086 struct btrfs_delayed_ref_root *delayed_refs;
2087 struct ulist *new_roots = NULL;
2088 struct rb_node *node;
9086db86 2089 u64 qgroup_to_skip;
550d7a2e
QW
2090 int ret = 0;
2091
2092 delayed_refs = &trans->transaction->delayed_refs;
9086db86 2093 qgroup_to_skip = delayed_refs->qgroup_to_skip;
550d7a2e
QW
2094 while ((node = rb_first(&delayed_refs->dirty_extent_root))) {
2095 record = rb_entry(node, struct btrfs_qgroup_extent_record,
2096 node);
2097
bc074524 2098 trace_btrfs_qgroup_account_extents(fs_info, record);
0f5dcf8d 2099
550d7a2e 2100 if (!ret) {
d1b8b94a
QW
2101 /*
2102 * Old roots should be searched when inserting qgroup
2103 * extent record
2104 */
2105 if (WARN_ON(!record->old_roots)) {
2106 /* Search commit root to find old_roots */
2107 ret = btrfs_find_all_roots(NULL, fs_info,
2108 record->bytenr, 0,
c995ab3c 2109 &record->old_roots, false);
d1b8b94a
QW
2110 if (ret < 0)
2111 goto cleanup;
2112 }
2113
550d7a2e 2114 /*
de47c9d3 2115 * Use SEQ_LAST as time_seq to do special search, which
550d7a2e
QW
2116 * doesn't lock tree or delayed_refs and search current
2117 * root. It's safe inside commit_transaction().
2118 */
2119 ret = btrfs_find_all_roots(trans, fs_info,
c995ab3c 2120 record->bytenr, SEQ_LAST, &new_roots, false);
550d7a2e
QW
2121 if (ret < 0)
2122 goto cleanup;
d1b8b94a 2123 if (qgroup_to_skip) {
9086db86 2124 ulist_del(new_roots, qgroup_to_skip, 0);
d1b8b94a
QW
2125 ulist_del(record->old_roots, qgroup_to_skip,
2126 0);
2127 }
550d7a2e
QW
2128 ret = btrfs_qgroup_account_extent(trans, fs_info,
2129 record->bytenr, record->num_bytes,
2130 record->old_roots, new_roots);
2131 record->old_roots = NULL;
2132 new_roots = NULL;
2133 }
2134cleanup:
2135 ulist_free(record->old_roots);
2136 ulist_free(new_roots);
2137 new_roots = NULL;
2138 rb_erase(node, &delayed_refs->dirty_extent_root);
2139 kfree(record);
2140
2141 }
2142 return ret;
2143}
2144
bed92eae
AJ
2145/*
2146 * called from commit_transaction. Writes all changed qgroups to disk.
2147 */
2148int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
2149 struct btrfs_fs_info *fs_info)
2150{
2151 struct btrfs_root *quota_root = fs_info->quota_root;
2152 int ret = 0;
2153
2154 if (!quota_root)
5d23515b 2155 return ret;
bed92eae
AJ
2156
2157 spin_lock(&fs_info->qgroup_lock);
2158 while (!list_empty(&fs_info->dirty_qgroups)) {
2159 struct btrfs_qgroup *qgroup;
2160 qgroup = list_first_entry(&fs_info->dirty_qgroups,
2161 struct btrfs_qgroup, dirty);
2162 list_del_init(&qgroup->dirty);
2163 spin_unlock(&fs_info->qgroup_lock);
2164 ret = update_qgroup_info_item(trans, quota_root, qgroup);
d3001ed3
DY
2165 if (ret)
2166 fs_info->qgroup_flags |=
2167 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2168 ret = update_qgroup_limit_item(trans, quota_root, qgroup);
bed92eae
AJ
2169 if (ret)
2170 fs_info->qgroup_flags |=
2171 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2172 spin_lock(&fs_info->qgroup_lock);
2173 }
afcdd129 2174 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
bed92eae
AJ
2175 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
2176 else
2177 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
2178 spin_unlock(&fs_info->qgroup_lock);
2179
2180 ret = update_qgroup_status_item(trans, fs_info, quota_root);
2181 if (ret)
2182 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2183
bed92eae
AJ
2184 return ret;
2185}
2186
2187/*
01327610 2188 * Copy the accounting information between qgroups. This is necessary
918c2ee1
MF
2189 * when a snapshot or a subvolume is created. Throwing an error will
2190 * cause a transaction abort so we take extra care here to only error
2191 * when a readonly fs is a reasonable outcome.
bed92eae
AJ
2192 */
2193int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2194 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
2195 struct btrfs_qgroup_inherit *inherit)
2196{
2197 int ret = 0;
2198 int i;
2199 u64 *i_qgroups;
2200 struct btrfs_root *quota_root = fs_info->quota_root;
2201 struct btrfs_qgroup *srcgroup;
2202 struct btrfs_qgroup *dstgroup;
2203 u32 level_size = 0;
3f5e2d3b 2204 u64 nums;
bed92eae 2205
f2f6ed3d 2206 mutex_lock(&fs_info->qgroup_ioctl_lock);
afcdd129 2207 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
f2f6ed3d 2208 goto out;
bed92eae 2209
f2f6ed3d
WS
2210 if (!quota_root) {
2211 ret = -EINVAL;
2212 goto out;
2213 }
bed92eae 2214
3f5e2d3b
WS
2215 if (inherit) {
2216 i_qgroups = (u64 *)(inherit + 1);
2217 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
2218 2 * inherit->num_excl_copies;
2219 for (i = 0; i < nums; ++i) {
2220 srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
09870d27 2221
918c2ee1
MF
2222 /*
2223 * Zero out invalid groups so we can ignore
2224 * them later.
2225 */
2226 if (!srcgroup ||
2227 ((srcgroup->qgroupid >> 48) <= (objectid >> 48)))
2228 *i_qgroups = 0ULL;
2229
3f5e2d3b
WS
2230 ++i_qgroups;
2231 }
2232 }
2233
bed92eae
AJ
2234 /*
2235 * create a tracking group for the subvol itself
2236 */
2237 ret = add_qgroup_item(trans, quota_root, objectid);
2238 if (ret)
2239 goto out;
2240
bed92eae
AJ
2241 if (srcid) {
2242 struct btrfs_root *srcroot;
2243 struct btrfs_key srckey;
bed92eae
AJ
2244
2245 srckey.objectid = srcid;
2246 srckey.type = BTRFS_ROOT_ITEM_KEY;
2247 srckey.offset = (u64)-1;
2248 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
2249 if (IS_ERR(srcroot)) {
2250 ret = PTR_ERR(srcroot);
2251 goto out;
2252 }
2253
0b246afa 2254 level_size = fs_info->nodesize;
bed92eae
AJ
2255 }
2256
2257 /*
2258 * add qgroup to all inherited groups
2259 */
2260 if (inherit) {
2261 i_qgroups = (u64 *)(inherit + 1);
918c2ee1
MF
2262 for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) {
2263 if (*i_qgroups == 0)
2264 continue;
bed92eae
AJ
2265 ret = add_qgroup_relation_item(trans, quota_root,
2266 objectid, *i_qgroups);
918c2ee1 2267 if (ret && ret != -EEXIST)
bed92eae
AJ
2268 goto out;
2269 ret = add_qgroup_relation_item(trans, quota_root,
2270 *i_qgroups, objectid);
918c2ee1 2271 if (ret && ret != -EEXIST)
bed92eae 2272 goto out;
bed92eae 2273 }
918c2ee1 2274 ret = 0;
bed92eae
AJ
2275 }
2276
2277
2278 spin_lock(&fs_info->qgroup_lock);
2279
2280 dstgroup = add_qgroup_rb(fs_info, objectid);
57a5a882
DC
2281 if (IS_ERR(dstgroup)) {
2282 ret = PTR_ERR(dstgroup);
bed92eae 2283 goto unlock;
57a5a882 2284 }
bed92eae 2285
e8c8541a 2286 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
e8c8541a
DY
2287 dstgroup->lim_flags = inherit->lim.flags;
2288 dstgroup->max_rfer = inherit->lim.max_rfer;
2289 dstgroup->max_excl = inherit->lim.max_excl;
2290 dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
2291 dstgroup->rsv_excl = inherit->lim.rsv_excl;
1510e71c
DY
2292
2293 ret = update_qgroup_limit_item(trans, quota_root, dstgroup);
2294 if (ret) {
2295 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
5d163e0e
JM
2296 btrfs_info(fs_info,
2297 "unable to update quota limit for %llu",
2298 dstgroup->qgroupid);
1510e71c
DY
2299 goto unlock;
2300 }
e8c8541a
DY
2301 }
2302
bed92eae
AJ
2303 if (srcid) {
2304 srcgroup = find_qgroup_rb(fs_info, srcid);
f3a87f1b 2305 if (!srcgroup)
bed92eae 2306 goto unlock;
fcebe456
JB
2307
2308 /*
2309 * We call inherit after we clone the root in order to make sure
2310 * our counts don't go crazy, so at this point the only
2311 * difference between the two roots should be the root node.
2312 */
2313 dstgroup->rfer = srcgroup->rfer;
2314 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
2315 dstgroup->excl = level_size;
2316 dstgroup->excl_cmpr = level_size;
bed92eae
AJ
2317 srcgroup->excl = level_size;
2318 srcgroup->excl_cmpr = level_size;
3eeb4d59
DY
2319
2320 /* inherit the limit info */
2321 dstgroup->lim_flags = srcgroup->lim_flags;
2322 dstgroup->max_rfer = srcgroup->max_rfer;
2323 dstgroup->max_excl = srcgroup->max_excl;
2324 dstgroup->rsv_rfer = srcgroup->rsv_rfer;
2325 dstgroup->rsv_excl = srcgroup->rsv_excl;
2326
bed92eae
AJ
2327 qgroup_dirty(fs_info, dstgroup);
2328 qgroup_dirty(fs_info, srcgroup);
2329 }
2330
f3a87f1b 2331 if (!inherit)
bed92eae
AJ
2332 goto unlock;
2333
2334 i_qgroups = (u64 *)(inherit + 1);
2335 for (i = 0; i < inherit->num_qgroups; ++i) {
918c2ee1 2336 if (*i_qgroups) {
0b246afa 2337 ret = add_relation_rb(fs_info, objectid, *i_qgroups);
918c2ee1
MF
2338 if (ret)
2339 goto unlock;
2340 }
bed92eae
AJ
2341 ++i_qgroups;
2342 }
2343
918c2ee1 2344 for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) {
bed92eae
AJ
2345 struct btrfs_qgroup *src;
2346 struct btrfs_qgroup *dst;
2347
918c2ee1
MF
2348 if (!i_qgroups[0] || !i_qgroups[1])
2349 continue;
2350
bed92eae
AJ
2351 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2352 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2353
2354 if (!src || !dst) {
2355 ret = -EINVAL;
2356 goto unlock;
2357 }
2358
2359 dst->rfer = src->rfer - level_size;
2360 dst->rfer_cmpr = src->rfer_cmpr - level_size;
bed92eae 2361 }
918c2ee1 2362 for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) {
bed92eae
AJ
2363 struct btrfs_qgroup *src;
2364 struct btrfs_qgroup *dst;
2365
918c2ee1
MF
2366 if (!i_qgroups[0] || !i_qgroups[1])
2367 continue;
2368
bed92eae
AJ
2369 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2370 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2371
2372 if (!src || !dst) {
2373 ret = -EINVAL;
2374 goto unlock;
2375 }
2376
2377 dst->excl = src->excl + level_size;
2378 dst->excl_cmpr = src->excl_cmpr + level_size;
bed92eae
AJ
2379 }
2380
2381unlock:
2382 spin_unlock(&fs_info->qgroup_lock);
2383out:
f2f6ed3d 2384 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
2385 return ret;
2386}
2387
003d7c59
JM
2388static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
2389{
2390 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
dba21324 2391 qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer)
003d7c59
JM
2392 return false;
2393
2394 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
dba21324 2395 qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl)
003d7c59
JM
2396 return false;
2397
2398 return true;
2399}
2400
dba21324
QW
2401static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
2402 enum btrfs_qgroup_rsv_type type)
bed92eae
AJ
2403{
2404 struct btrfs_root *quota_root;
2405 struct btrfs_qgroup *qgroup;
2406 struct btrfs_fs_info *fs_info = root->fs_info;
2407 u64 ref_root = root->root_key.objectid;
2408 int ret = 0;
48a89bc4 2409 int retried = 0;
bed92eae
AJ
2410 struct ulist_node *unode;
2411 struct ulist_iterator uiter;
2412
2413 if (!is_fstree(ref_root))
2414 return 0;
2415
2416 if (num_bytes == 0)
2417 return 0;
f29efe29
SD
2418
2419 if (test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags) &&
2420 capable(CAP_SYS_RESOURCE))
2421 enforce = false;
2422
48a89bc4 2423retry:
bed92eae
AJ
2424 spin_lock(&fs_info->qgroup_lock);
2425 quota_root = fs_info->quota_root;
2426 if (!quota_root)
2427 goto out;
2428
2429 qgroup = find_qgroup_rb(fs_info, ref_root);
2430 if (!qgroup)
2431 goto out;
2432
2433 /*
2434 * in a first step, we check all affected qgroups if any limits would
2435 * be exceeded
2436 */
1e8f9158
WS
2437 ulist_reinit(fs_info->qgroup_ulist);
2438 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
3c97185c
WS
2439 (uintptr_t)qgroup, GFP_ATOMIC);
2440 if (ret < 0)
2441 goto out;
bed92eae 2442 ULIST_ITER_INIT(&uiter);
1e8f9158 2443 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
bed92eae
AJ
2444 struct btrfs_qgroup *qg;
2445 struct btrfs_qgroup_list *glist;
2446
ef2fff64 2447 qg = unode_aux_to_qgroup(unode);
bed92eae 2448
003d7c59 2449 if (enforce && !qgroup_check_limits(qg, num_bytes)) {
48a89bc4
GR
2450 /*
2451 * Commit the tree and retry, since we may have
2452 * deletions which would free up space.
2453 */
dba21324 2454 if (!retried && qgroup_rsv_total(qg) > 0) {
48a89bc4
GR
2455 struct btrfs_trans_handle *trans;
2456
2457 spin_unlock(&fs_info->qgroup_lock);
2458 ret = btrfs_start_delalloc_inodes(root, 0);
2459 if (ret)
2460 return ret;
6374e57a 2461 btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
48a89bc4
GR
2462 trans = btrfs_join_transaction(root);
2463 if (IS_ERR(trans))
2464 return PTR_ERR(trans);
2465 ret = btrfs_commit_transaction(trans);
2466 if (ret)
2467 return ret;
2468 retried++;
2469 goto retry;
2470 }
bed92eae 2471 ret = -EDQUOT;
720f1e20
WS
2472 goto out;
2473 }
bed92eae
AJ
2474
2475 list_for_each_entry(glist, &qg->groups, next_group) {
1e8f9158
WS
2476 ret = ulist_add(fs_info->qgroup_ulist,
2477 glist->group->qgroupid,
3c97185c
WS
2478 (uintptr_t)glist->group, GFP_ATOMIC);
2479 if (ret < 0)
2480 goto out;
bed92eae
AJ
2481 }
2482 }
3c97185c 2483 ret = 0;
bed92eae
AJ
2484 /*
2485 * no limits exceeded, now record the reservation into all qgroups
2486 */
2487 ULIST_ITER_INIT(&uiter);
1e8f9158 2488 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
bed92eae
AJ
2489 struct btrfs_qgroup *qg;
2490
ef2fff64 2491 qg = unode_aux_to_qgroup(unode);
bed92eae 2492
64ee4e75
QW
2493 trace_qgroup_update_reserve(fs_info, qg, num_bytes, type);
2494 qgroup_rsv_add(fs_info, qg, num_bytes, type);
bed92eae
AJ
2495 }
2496
2497out:
2498 spin_unlock(&fs_info->qgroup_lock);
bed92eae
AJ
2499 return ret;
2500}
2501
297d750b 2502void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
d4e5c920
QW
2503 u64 ref_root, u64 num_bytes,
2504 enum btrfs_qgroup_rsv_type type)
bed92eae
AJ
2505{
2506 struct btrfs_root *quota_root;
2507 struct btrfs_qgroup *qgroup;
bed92eae
AJ
2508 struct ulist_node *unode;
2509 struct ulist_iterator uiter;
3c97185c 2510 int ret = 0;
bed92eae
AJ
2511
2512 if (!is_fstree(ref_root))
2513 return;
2514
2515 if (num_bytes == 0)
2516 return;
2517
2518 spin_lock(&fs_info->qgroup_lock);
2519
2520 quota_root = fs_info->quota_root;
2521 if (!quota_root)
2522 goto out;
2523
2524 qgroup = find_qgroup_rb(fs_info, ref_root);
2525 if (!qgroup)
2526 goto out;
2527
1e8f9158
WS
2528 ulist_reinit(fs_info->qgroup_ulist);
2529 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
3c97185c
WS
2530 (uintptr_t)qgroup, GFP_ATOMIC);
2531 if (ret < 0)
2532 goto out;
bed92eae 2533 ULIST_ITER_INIT(&uiter);
1e8f9158 2534 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
bed92eae
AJ
2535 struct btrfs_qgroup *qg;
2536 struct btrfs_qgroup_list *glist;
2537
ef2fff64 2538 qg = unode_aux_to_qgroup(unode);
bed92eae 2539
64ee4e75
QW
2540 trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes, type);
2541 qgroup_rsv_release(fs_info, qg, num_bytes, type);
bed92eae
AJ
2542
2543 list_for_each_entry(glist, &qg->groups, next_group) {
1e8f9158
WS
2544 ret = ulist_add(fs_info->qgroup_ulist,
2545 glist->group->qgroupid,
3c97185c
WS
2546 (uintptr_t)glist->group, GFP_ATOMIC);
2547 if (ret < 0)
2548 goto out;
bed92eae
AJ
2549 }
2550 }
2551
2552out:
2553 spin_unlock(&fs_info->qgroup_lock);
bed92eae
AJ
2554}
2555
2f232036
JS
2556/*
2557 * returns < 0 on error, 0 when more leafs are to be scanned.
3393168d 2558 * returns 1 when done.
2f232036
JS
2559 */
2560static int
b382a324 2561qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
0a0e8b89 2562 struct btrfs_trans_handle *trans)
2f232036
JS
2563{
2564 struct btrfs_key found;
0a0e8b89 2565 struct extent_buffer *scratch_leaf = NULL;
2f232036 2566 struct ulist *roots = NULL;
3284da7b 2567 struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
fcebe456 2568 u64 num_bytes;
2f232036
JS
2569 int slot;
2570 int ret;
2571
2f232036
JS
2572 mutex_lock(&fs_info->qgroup_rescan_lock);
2573 ret = btrfs_search_slot_for_read(fs_info->extent_root,
2574 &fs_info->qgroup_rescan_progress,
2575 path, 1, 0);
2576
ab8d0fc4
JM
2577 btrfs_debug(fs_info,
2578 "current progress key (%llu %u %llu), search_slot ret %d",
2579 fs_info->qgroup_rescan_progress.objectid,
2580 fs_info->qgroup_rescan_progress.type,
2581 fs_info->qgroup_rescan_progress.offset, ret);
2f232036
JS
2582
2583 if (ret) {
2584 /*
2585 * The rescan is about to end, we will not be scanning any
2586 * further blocks. We cannot unset the RESCAN flag here, because
2587 * we want to commit the transaction if everything went well.
2588 * To make the live accounting work in this phase, we set our
2589 * scan progress pointer such that every real extent objectid
2590 * will be smaller.
2591 */
2592 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
2593 btrfs_release_path(path);
2594 mutex_unlock(&fs_info->qgroup_rescan_lock);
2595 return ret;
2596 }
2597
2598 btrfs_item_key_to_cpu(path->nodes[0], &found,
2599 btrfs_header_nritems(path->nodes[0]) - 1);
2600 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
2601
2602 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
0a0e8b89
QW
2603 scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]);
2604 if (!scratch_leaf) {
2605 ret = -ENOMEM;
2606 mutex_unlock(&fs_info->qgroup_rescan_lock);
2607 goto out;
2608 }
2609 extent_buffer_get(scratch_leaf);
2610 btrfs_tree_read_lock(scratch_leaf);
2611 btrfs_set_lock_blocking_rw(scratch_leaf, BTRFS_READ_LOCK);
2f232036
JS
2612 slot = path->slots[0];
2613 btrfs_release_path(path);
2614 mutex_unlock(&fs_info->qgroup_rescan_lock);
2615
2616 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
2617 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
3a6d75e8
JB
2618 if (found.type != BTRFS_EXTENT_ITEM_KEY &&
2619 found.type != BTRFS_METADATA_ITEM_KEY)
2f232036 2620 continue;
3a6d75e8 2621 if (found.type == BTRFS_METADATA_ITEM_KEY)
da17066c 2622 num_bytes = fs_info->nodesize;
3a6d75e8
JB
2623 else
2624 num_bytes = found.offset;
2625
fcebe456 2626 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
c995ab3c 2627 &roots, false);
2f232036
JS
2628 if (ret < 0)
2629 goto out;
9d220c95
QW
2630 /* For rescan, just pass old_roots as NULL */
2631 ret = btrfs_qgroup_account_extent(trans, fs_info,
2632 found.objectid, num_bytes, NULL, roots);
2633 if (ret < 0)
fcebe456 2634 goto out;
2f232036 2635 }
2f232036 2636out:
0a0e8b89
QW
2637 if (scratch_leaf) {
2638 btrfs_tree_read_unlock_blocking(scratch_leaf);
2639 free_extent_buffer(scratch_leaf);
2640 }
2f232036
JS
2641 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2642
2643 return ret;
2644}
2645
d458b054 2646static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2f232036 2647{
b382a324
JS
2648 struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
2649 qgroup_rescan_work);
2f232036
JS
2650 struct btrfs_path *path;
2651 struct btrfs_trans_handle *trans = NULL;
2f232036 2652 int err = -ENOMEM;
53b7cde9 2653 int ret = 0;
2f232036
JS
2654
2655 path = btrfs_alloc_path();
2656 if (!path)
2657 goto out;
2f232036
JS
2658
2659 err = 0;
7343dd61 2660 while (!err && !btrfs_fs_closing(fs_info)) {
2f232036
JS
2661 trans = btrfs_start_transaction(fs_info->fs_root, 0);
2662 if (IS_ERR(trans)) {
2663 err = PTR_ERR(trans);
2664 break;
2665 }
afcdd129 2666 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
2f232036
JS
2667 err = -EINTR;
2668 } else {
0a0e8b89 2669 err = qgroup_rescan_leaf(fs_info, path, trans);
2f232036
JS
2670 }
2671 if (err > 0)
3a45bb20 2672 btrfs_commit_transaction(trans);
2f232036 2673 else
3a45bb20 2674 btrfs_end_transaction(trans);
2f232036
JS
2675 }
2676
2677out:
2f232036 2678 btrfs_free_path(path);
2f232036
JS
2679
2680 mutex_lock(&fs_info->qgroup_rescan_lock);
7343dd61
JM
2681 if (!btrfs_fs_closing(fs_info))
2682 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2f232036 2683
3393168d 2684 if (err > 0 &&
2f232036
JS
2685 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2686 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2687 } else if (err < 0) {
2688 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2689 }
2690 mutex_unlock(&fs_info->qgroup_rescan_lock);
2691
53b7cde9 2692 /*
01327610 2693 * only update status, since the previous part has already updated the
53b7cde9
QW
2694 * qgroup info.
2695 */
2696 trans = btrfs_start_transaction(fs_info->quota_root, 1);
2697 if (IS_ERR(trans)) {
2698 err = PTR_ERR(trans);
2699 btrfs_err(fs_info,
913e1535 2700 "fail to start transaction for status update: %d",
53b7cde9
QW
2701 err);
2702 goto done;
2703 }
2704 ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
2705 if (ret < 0) {
2706 err = ret;
ab8d0fc4 2707 btrfs_err(fs_info, "fail to update qgroup status: %d", err);
53b7cde9 2708 }
3a45bb20 2709 btrfs_end_transaction(trans);
53b7cde9 2710
7343dd61
JM
2711 if (btrfs_fs_closing(fs_info)) {
2712 btrfs_info(fs_info, "qgroup scan paused");
2713 } else if (err >= 0) {
efe120a0 2714 btrfs_info(fs_info, "qgroup scan completed%s",
3393168d 2715 err > 0 ? " (inconsistency flag cleared)" : "");
2f232036 2716 } else {
efe120a0 2717 btrfs_err(fs_info, "qgroup scan failed with %d", err);
2f232036 2718 }
57254b6e 2719
53b7cde9 2720done:
d2c609b8
JM
2721 mutex_lock(&fs_info->qgroup_rescan_lock);
2722 fs_info->qgroup_rescan_running = false;
2723 mutex_unlock(&fs_info->qgroup_rescan_lock);
57254b6e 2724 complete_all(&fs_info->qgroup_rescan_completion);
2f232036
JS
2725}
2726
b382a324
JS
2727/*
2728 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2729 * memory required for the rescan context.
2730 */
2731static int
2732qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2733 int init_flags)
2f232036
JS
2734{
2735 int ret = 0;
2f232036 2736
b382a324
JS
2737 if (!init_flags &&
2738 (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
2739 !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
2740 ret = -EINVAL;
2741 goto err;
2742 }
2f232036
JS
2743
2744 mutex_lock(&fs_info->qgroup_rescan_lock);
2745 spin_lock(&fs_info->qgroup_lock);
b382a324
JS
2746
2747 if (init_flags) {
2748 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2749 ret = -EINPROGRESS;
2750 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2751 ret = -EINVAL;
2752
2753 if (ret) {
2754 spin_unlock(&fs_info->qgroup_lock);
2755 mutex_unlock(&fs_info->qgroup_rescan_lock);
2756 goto err;
2757 }
b382a324 2758 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2f232036
JS
2759 }
2760
2f232036
JS
2761 memset(&fs_info->qgroup_rescan_progress, 0,
2762 sizeof(fs_info->qgroup_rescan_progress));
b382a324 2763 fs_info->qgroup_rescan_progress.objectid = progress_objectid;
190631f1 2764 init_completion(&fs_info->qgroup_rescan_completion);
8d9eddad 2765 fs_info->qgroup_rescan_running = true;
b382a324
JS
2766
2767 spin_unlock(&fs_info->qgroup_lock);
2768 mutex_unlock(&fs_info->qgroup_rescan_lock);
2769
b382a324
JS
2770 memset(&fs_info->qgroup_rescan_work, 0,
2771 sizeof(fs_info->qgroup_rescan_work));
fc97fab0 2772 btrfs_init_work(&fs_info->qgroup_rescan_work,
9e0af237 2773 btrfs_qgroup_rescan_helper,
fc97fab0 2774 btrfs_qgroup_rescan_worker, NULL, NULL);
b382a324
JS
2775
2776 if (ret) {
2777err:
efe120a0 2778 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
b382a324
JS
2779 return ret;
2780 }
2781
2782 return 0;
2783}
2784
2785static void
2786qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
2787{
2788 struct rb_node *n;
2789 struct btrfs_qgroup *qgroup;
2790
2791 spin_lock(&fs_info->qgroup_lock);
2f232036
JS
2792 /* clear all current qgroup tracking information */
2793 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2794 qgroup = rb_entry(n, struct btrfs_qgroup, node);
2795 qgroup->rfer = 0;
2796 qgroup->rfer_cmpr = 0;
2797 qgroup->excl = 0;
2798 qgroup->excl_cmpr = 0;
2799 }
2800 spin_unlock(&fs_info->qgroup_lock);
b382a324 2801}
2f232036 2802
b382a324
JS
2803int
2804btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2805{
2806 int ret = 0;
2807 struct btrfs_trans_handle *trans;
2808
2809 ret = qgroup_rescan_init(fs_info, 0, 1);
2810 if (ret)
2811 return ret;
2812
2813 /*
2814 * We have set the rescan_progress to 0, which means no more
2815 * delayed refs will be accounted by btrfs_qgroup_account_ref.
2816 * However, btrfs_qgroup_account_ref may be right after its call
2817 * to btrfs_find_all_roots, in which case it would still do the
2818 * accounting.
2819 * To solve this, we're committing the transaction, which will
2820 * ensure we run all delayed refs and only after that, we are
2821 * going to clear all tracking information for a clean start.
2822 */
2823
2824 trans = btrfs_join_transaction(fs_info->fs_root);
2825 if (IS_ERR(trans)) {
2826 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2827 return PTR_ERR(trans);
2828 }
3a45bb20 2829 ret = btrfs_commit_transaction(trans);
b382a324
JS
2830 if (ret) {
2831 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2832 return ret;
2833 }
2834
2835 qgroup_rescan_zero_tracking(fs_info);
2836
fc97fab0
QW
2837 btrfs_queue_work(fs_info->qgroup_rescan_workers,
2838 &fs_info->qgroup_rescan_work);
2f232036
JS
2839
2840 return 0;
2841}
57254b6e 2842
d06f23d6
JM
2843int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
2844 bool interruptible)
57254b6e
JS
2845{
2846 int running;
2847 int ret = 0;
2848
2849 mutex_lock(&fs_info->qgroup_rescan_lock);
2850 spin_lock(&fs_info->qgroup_lock);
d2c609b8 2851 running = fs_info->qgroup_rescan_running;
57254b6e
JS
2852 spin_unlock(&fs_info->qgroup_lock);
2853 mutex_unlock(&fs_info->qgroup_rescan_lock);
2854
d06f23d6
JM
2855 if (!running)
2856 return 0;
2857
2858 if (interruptible)
57254b6e
JS
2859 ret = wait_for_completion_interruptible(
2860 &fs_info->qgroup_rescan_completion);
d06f23d6
JM
2861 else
2862 wait_for_completion(&fs_info->qgroup_rescan_completion);
57254b6e
JS
2863
2864 return ret;
2865}
b382a324
JS
2866
2867/*
2868 * this is only called from open_ctree where we're still single threaded, thus
2869 * locking is omitted here.
2870 */
2871void
2872btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
2873{
2874 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
fc97fab0
QW
2875 btrfs_queue_work(fs_info->qgroup_rescan_workers,
2876 &fs_info->qgroup_rescan_work);
b382a324 2877}
52472553
QW
2878
2879/*
2880 * Reserve qgroup space for range [start, start + len).
2881 *
2882 * This function will either reserve space from related qgroups or doing
2883 * nothing if the range is already reserved.
2884 *
2885 * Return 0 for successful reserve
2886 * Return <0 for error (including -EQUOT)
2887 *
2888 * NOTE: this function may sleep for memory allocation.
364ecf36
QW
2889 * if btrfs_qgroup_reserve_data() is called multiple times with
2890 * same @reserved, caller must ensure when error happens it's OK
2891 * to free *ALL* reserved space.
52472553 2892 */
364ecf36
QW
2893int btrfs_qgroup_reserve_data(struct inode *inode,
2894 struct extent_changeset **reserved_ret, u64 start,
2895 u64 len)
52472553
QW
2896{
2897 struct btrfs_root *root = BTRFS_I(inode)->root;
52472553
QW
2898 struct ulist_node *unode;
2899 struct ulist_iterator uiter;
364ecf36
QW
2900 struct extent_changeset *reserved;
2901 u64 orig_reserved;
2902 u64 to_reserve;
52472553
QW
2903 int ret;
2904
afcdd129
JB
2905 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) ||
2906 !is_fstree(root->objectid) || len == 0)
52472553
QW
2907 return 0;
2908
364ecf36
QW
2909 /* @reserved parameter is mandatory for qgroup */
2910 if (WARN_ON(!reserved_ret))
2911 return -EINVAL;
2912 if (!*reserved_ret) {
2913 *reserved_ret = extent_changeset_alloc();
2914 if (!*reserved_ret)
2915 return -ENOMEM;
2916 }
2917 reserved = *reserved_ret;
2918 /* Record already reserved space */
2919 orig_reserved = reserved->bytes_changed;
52472553 2920 ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
364ecf36
QW
2921 start + len -1, EXTENT_QGROUP_RESERVED, reserved);
2922
2923 /* Newly reserved space */
2924 to_reserve = reserved->bytes_changed - orig_reserved;
81fb6f77 2925 trace_btrfs_qgroup_reserve_data(inode, start, len,
364ecf36 2926 to_reserve, QGROUP_RESERVE);
52472553
QW
2927 if (ret < 0)
2928 goto cleanup;
dba21324 2929 ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA);
52472553
QW
2930 if (ret < 0)
2931 goto cleanup;
2932
52472553
QW
2933 return ret;
2934
2935cleanup:
364ecf36 2936 /* cleanup *ALL* already reserved ranges */
52472553 2937 ULIST_ITER_INIT(&uiter);
364ecf36 2938 while ((unode = ulist_next(&reserved->range_changed, &uiter)))
52472553 2939 clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
ae0f1625 2940 unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL);
364ecf36 2941 extent_changeset_release(reserved);
52472553
QW
2942 return ret;
2943}
f695fdce 2944
bc42bda2
QW
2945/* Free ranges specified by @reserved, normally in error path */
2946static int qgroup_free_reserved_data(struct inode *inode,
2947 struct extent_changeset *reserved, u64 start, u64 len)
2948{
2949 struct btrfs_root *root = BTRFS_I(inode)->root;
2950 struct ulist_node *unode;
2951 struct ulist_iterator uiter;
2952 struct extent_changeset changeset;
2953 int freed = 0;
2954 int ret;
2955
2956 extent_changeset_init(&changeset);
2957 len = round_up(start + len, root->fs_info->sectorsize);
2958 start = round_down(start, root->fs_info->sectorsize);
2959
2960 ULIST_ITER_INIT(&uiter);
2961 while ((unode = ulist_next(&reserved->range_changed, &uiter))) {
2962 u64 range_start = unode->val;
2963 /* unode->aux is the inclusive end */
2964 u64 range_len = unode->aux - range_start + 1;
2965 u64 free_start;
2966 u64 free_len;
2967
2968 extent_changeset_release(&changeset);
2969
2970 /* Only free range in range [start, start + len) */
2971 if (range_start >= start + len ||
2972 range_start + range_len <= start)
2973 continue;
2974 free_start = max(range_start, start);
2975 free_len = min(start + len, range_start + range_len) -
2976 free_start;
2977 /*
2978 * TODO: To also modify reserved->ranges_reserved to reflect
2979 * the modification.
2980 *
2981 * However as long as we free qgroup reserved according to
2982 * EXTENT_QGROUP_RESERVED, we won't double free.
2983 * So not need to rush.
2984 */
2985 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_failure_tree,
2986 free_start, free_start + free_len - 1,
2987 EXTENT_QGROUP_RESERVED, &changeset);
2988 if (ret < 0)
2989 goto out;
2990 freed += changeset.bytes_changed;
2991 }
d4e5c920
QW
2992 btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed,
2993 BTRFS_QGROUP_RSV_DATA);
bc42bda2
QW
2994 ret = freed;
2995out:
2996 extent_changeset_release(&changeset);
2997 return ret;
2998}
2999
3000static int __btrfs_qgroup_release_data(struct inode *inode,
3001 struct extent_changeset *reserved, u64 start, u64 len,
3002 int free)
f695fdce
QW
3003{
3004 struct extent_changeset changeset;
81fb6f77 3005 int trace_op = QGROUP_RELEASE;
f695fdce
QW
3006 int ret;
3007
bc42bda2
QW
3008 /* In release case, we shouldn't have @reserved */
3009 WARN_ON(!free && reserved);
3010 if (free && reserved)
3011 return qgroup_free_reserved_data(inode, reserved, start, len);
364ecf36 3012 extent_changeset_init(&changeset);
f695fdce 3013 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
f734c44a 3014 start + len -1, EXTENT_QGROUP_RESERVED, &changeset);
f695fdce
QW
3015 if (ret < 0)
3016 goto out;
3017
d51ea5dd 3018 if (free)
81fb6f77 3019 trace_op = QGROUP_FREE;
81fb6f77
QW
3020 trace_btrfs_qgroup_release_data(inode, start, len,
3021 changeset.bytes_changed, trace_op);
d51ea5dd
QW
3022 if (free)
3023 btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
3024 BTRFS_I(inode)->root->objectid,
d4e5c920 3025 changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
7bc329c1 3026 ret = changeset.bytes_changed;
f695fdce 3027out:
364ecf36 3028 extent_changeset_release(&changeset);
f695fdce
QW
3029 return ret;
3030}
3031
3032/*
3033 * Free a reserved space range from io_tree and related qgroups
3034 *
3035 * Should be called when a range of pages get invalidated before reaching disk.
3036 * Or for error cleanup case.
bc42bda2
QW
3037 * if @reserved is given, only reserved range in [@start, @start + @len) will
3038 * be freed.
f695fdce
QW
3039 *
3040 * For data written to disk, use btrfs_qgroup_release_data().
3041 *
3042 * NOTE: This function may sleep for memory allocation.
3043 */
bc42bda2
QW
3044int btrfs_qgroup_free_data(struct inode *inode,
3045 struct extent_changeset *reserved, u64 start, u64 len)
f695fdce 3046{
bc42bda2 3047 return __btrfs_qgroup_release_data(inode, reserved, start, len, 1);
f695fdce
QW
3048}
3049
3050/*
3051 * Release a reserved space range from io_tree only.
3052 *
3053 * Should be called when a range of pages get written to disk and corresponding
3054 * FILE_EXTENT is inserted into corresponding root.
3055 *
3056 * Since new qgroup accounting framework will only update qgroup numbers at
3057 * commit_transaction() time, its reserved space shouldn't be freed from
3058 * related qgroups.
3059 *
3060 * But we should release the range from io_tree, to allow further write to be
3061 * COWed.
3062 *
3063 * NOTE: This function may sleep for memory allocation.
3064 */
3065int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
3066{
bc42bda2 3067 return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
f695fdce 3068}
55eeaf05 3069
733e03a0
QW
3070int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
3071 enum btrfs_qgroup_rsv_type type, bool enforce)
55eeaf05 3072{
0b246afa 3073 struct btrfs_fs_info *fs_info = root->fs_info;
55eeaf05
QW
3074 int ret;
3075
0b246afa 3076 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
afcdd129 3077 !is_fstree(root->objectid) || num_bytes == 0)
55eeaf05
QW
3078 return 0;
3079
0b246afa 3080 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
3159fe7b 3081 trace_qgroup_meta_reserve(root, (s64)num_bytes);
733e03a0 3082 ret = qgroup_reserve(root, num_bytes, enforce, type);
55eeaf05
QW
3083 if (ret < 0)
3084 return ret;
ce0dcee6 3085 atomic64_add(num_bytes, &root->qgroup_meta_rsv);
55eeaf05
QW
3086 return ret;
3087}
3088
733e03a0 3089void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root)
55eeaf05 3090{
0b246afa 3091 struct btrfs_fs_info *fs_info = root->fs_info;
ce0dcee6 3092 u64 reserved;
55eeaf05 3093
0b246afa 3094 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
afcdd129 3095 !is_fstree(root->objectid))
55eeaf05
QW
3096 return;
3097
ce0dcee6 3098 reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0);
55eeaf05
QW
3099 if (reserved == 0)
3100 return;
3159fe7b 3101 trace_qgroup_meta_reserve(root, -(s64)reserved);
d4e5c920 3102 btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved,
733e03a0 3103 BTRFS_QGROUP_RSV_META_PERTRANS);
55eeaf05
QW
3104}
3105
733e03a0
QW
3106void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
3107 enum btrfs_qgroup_rsv_type type)
55eeaf05 3108{
0b246afa
JM
3109 struct btrfs_fs_info *fs_info = root->fs_info;
3110
3111 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
afcdd129 3112 !is_fstree(root->objectid))
55eeaf05
QW
3113 return;
3114
0b246afa 3115 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
ce0dcee6
GR
3116 WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes);
3117 atomic64_sub(num_bytes, &root->qgroup_meta_rsv);
3159fe7b 3118 trace_qgroup_meta_reserve(root, -(s64)num_bytes);
733e03a0 3119 btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes, type);
55eeaf05 3120}
56fa9d07
QW
3121
3122/*
01327610 3123 * Check qgroup reserved space leaking, normally at destroy inode
56fa9d07
QW
3124 * time
3125 */
3126void btrfs_qgroup_check_reserved_leak(struct inode *inode)
3127{
3128 struct extent_changeset changeset;
3129 struct ulist_node *unode;
3130 struct ulist_iterator iter;
3131 int ret;
3132
364ecf36 3133 extent_changeset_init(&changeset);
56fa9d07 3134 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
f734c44a 3135 EXTENT_QGROUP_RESERVED, &changeset);
56fa9d07
QW
3136
3137 WARN_ON(ret < 0);
3138 if (WARN_ON(changeset.bytes_changed)) {
3139 ULIST_ITER_INIT(&iter);
53d32359 3140 while ((unode = ulist_next(&changeset.range_changed, &iter))) {
56fa9d07
QW
3141 btrfs_warn(BTRFS_I(inode)->root->fs_info,
3142 "leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu",
3143 inode->i_ino, unode->val, unode->aux);
3144 }
0b08e1f4
DS
3145 btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
3146 BTRFS_I(inode)->root->objectid,
d4e5c920 3147 changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
0b08e1f4 3148
56fa9d07 3149 }
364ecf36 3150 extent_changeset_release(&changeset);
56fa9d07 3151}