Btrfs: put the block group cache after we commit the super
[linux-2.6-block.git] / fs / btrfs / free-space-cache.c
CommitLineData
0f9dd46c
JB
1/*
2 * Copyright (C) 2008 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
96303081 19#include <linux/pagemap.h>
0f9dd46c 20#include <linux/sched.h>
5a0e3ad6 21#include <linux/slab.h>
96303081 22#include <linux/math64.h>
6ab60601 23#include <linux/ratelimit.h>
0f9dd46c 24#include "ctree.h"
fa9c0d79
CM
25#include "free-space-cache.h"
26#include "transaction.h"
0af3d00b 27#include "disk-io.h"
43be2146 28#include "extent_io.h"
581bb050 29#include "inode-map.h"
fa9c0d79 30
96303081
JB
31#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
32#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
0f9dd46c 33
34d52cb6 34static int link_free_space(struct btrfs_free_space_ctl *ctl,
0cb59c99
JB
35 struct btrfs_free_space *info);
36
0414efae
LZ
37static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38 struct btrfs_path *path,
39 u64 offset)
0af3d00b
JB
40{
41 struct btrfs_key key;
42 struct btrfs_key location;
43 struct btrfs_disk_key disk_key;
44 struct btrfs_free_space_header *header;
45 struct extent_buffer *leaf;
46 struct inode *inode = NULL;
47 int ret;
48
0af3d00b 49 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
0414efae 50 key.offset = offset;
0af3d00b
JB
51 key.type = 0;
52
53 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
54 if (ret < 0)
55 return ERR_PTR(ret);
56 if (ret > 0) {
b3b4aa74 57 btrfs_release_path(path);
0af3d00b
JB
58 return ERR_PTR(-ENOENT);
59 }
60
61 leaf = path->nodes[0];
62 header = btrfs_item_ptr(leaf, path->slots[0],
63 struct btrfs_free_space_header);
64 btrfs_free_space_key(leaf, header, &disk_key);
65 btrfs_disk_key_to_cpu(&location, &disk_key);
b3b4aa74 66 btrfs_release_path(path);
0af3d00b
JB
67
68 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
69 if (!inode)
70 return ERR_PTR(-ENOENT);
71 if (IS_ERR(inode))
72 return inode;
73 if (is_bad_inode(inode)) {
74 iput(inode);
75 return ERR_PTR(-ENOENT);
76 }
77
adae52b9
MX
78 inode->i_mapping->flags &= ~__GFP_FS;
79
0414efae
LZ
80 return inode;
81}
82
83struct inode *lookup_free_space_inode(struct btrfs_root *root,
84 struct btrfs_block_group_cache
85 *block_group, struct btrfs_path *path)
86{
87 struct inode *inode = NULL;
88
89 spin_lock(&block_group->lock);
90 if (block_group->inode)
91 inode = igrab(block_group->inode);
92 spin_unlock(&block_group->lock);
93 if (inode)
94 return inode;
95
96 inode = __lookup_free_space_inode(root, path,
97 block_group->key.objectid);
98 if (IS_ERR(inode))
99 return inode;
100
0af3d00b 101 spin_lock(&block_group->lock);
2f356126
JB
102 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) {
103 printk(KERN_INFO "Old style space inode found, converting.\n");
104 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM;
105 block_group->disk_cache_state = BTRFS_DC_CLEAR;
106 }
107
300e4f8a 108 if (!block_group->iref) {
0af3d00b
JB
109 block_group->inode = igrab(inode);
110 block_group->iref = 1;
111 }
112 spin_unlock(&block_group->lock);
113
114 return inode;
115}
116
0414efae
LZ
117int __create_free_space_inode(struct btrfs_root *root,
118 struct btrfs_trans_handle *trans,
119 struct btrfs_path *path, u64 ino, u64 offset)
0af3d00b
JB
120{
121 struct btrfs_key key;
122 struct btrfs_disk_key disk_key;
123 struct btrfs_free_space_header *header;
124 struct btrfs_inode_item *inode_item;
125 struct extent_buffer *leaf;
0af3d00b
JB
126 int ret;
127
0414efae 128 ret = btrfs_insert_empty_inode(trans, root, path, ino);
0af3d00b
JB
129 if (ret)
130 return ret;
131
132 leaf = path->nodes[0];
133 inode_item = btrfs_item_ptr(leaf, path->slots[0],
134 struct btrfs_inode_item);
135 btrfs_item_key(leaf, &disk_key, path->slots[0]);
136 memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
137 sizeof(*inode_item));
138 btrfs_set_inode_generation(leaf, inode_item, trans->transid);
139 btrfs_set_inode_size(leaf, inode_item, 0);
140 btrfs_set_inode_nbytes(leaf, inode_item, 0);
141 btrfs_set_inode_uid(leaf, inode_item, 0);
142 btrfs_set_inode_gid(leaf, inode_item, 0);
143 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
144 btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
2f356126 145 BTRFS_INODE_PREALLOC);
0af3d00b
JB
146 btrfs_set_inode_nlink(leaf, inode_item, 1);
147 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
0414efae 148 btrfs_set_inode_block_group(leaf, inode_item, offset);
0af3d00b 149 btrfs_mark_buffer_dirty(leaf);
b3b4aa74 150 btrfs_release_path(path);
0af3d00b
JB
151
152 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
0414efae 153 key.offset = offset;
0af3d00b
JB
154 key.type = 0;
155
156 ret = btrfs_insert_empty_item(trans, root, path, &key,
157 sizeof(struct btrfs_free_space_header));
158 if (ret < 0) {
b3b4aa74 159 btrfs_release_path(path);
0af3d00b
JB
160 return ret;
161 }
162 leaf = path->nodes[0];
163 header = btrfs_item_ptr(leaf, path->slots[0],
164 struct btrfs_free_space_header);
165 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
166 btrfs_set_free_space_key(leaf, header, &disk_key);
167 btrfs_mark_buffer_dirty(leaf);
b3b4aa74 168 btrfs_release_path(path);
0af3d00b
JB
169
170 return 0;
171}
172
0414efae
LZ
173int create_free_space_inode(struct btrfs_root *root,
174 struct btrfs_trans_handle *trans,
175 struct btrfs_block_group_cache *block_group,
176 struct btrfs_path *path)
177{
178 int ret;
179 u64 ino;
180
181 ret = btrfs_find_free_objectid(root, &ino);
182 if (ret < 0)
183 return ret;
184
185 return __create_free_space_inode(root, trans, path, ino,
186 block_group->key.objectid);
187}
188
0af3d00b
JB
189int btrfs_truncate_free_space_cache(struct btrfs_root *root,
190 struct btrfs_trans_handle *trans,
191 struct btrfs_path *path,
192 struct inode *inode)
193{
65450aa6 194 struct btrfs_block_rsv *rsv;
0af3d00b
JB
195 loff_t oldsize;
196 int ret = 0;
197
65450aa6 198 rsv = trans->block_rsv;
0af3d00b
JB
199 trans->block_rsv = root->orphan_block_rsv;
200 ret = btrfs_block_rsv_check(trans, root,
201 root->orphan_block_rsv,
482e6dc5 202 0, 5, 0);
0af3d00b
JB
203 if (ret)
204 return ret;
205
206 oldsize = i_size_read(inode);
207 btrfs_i_size_write(inode, 0);
208 truncate_pagecache(inode, oldsize, 0);
209
210 /*
211 * We don't need an orphan item because truncating the free space cache
212 * will never be split across transactions.
213 */
214 ret = btrfs_truncate_inode_items(trans, root, inode,
215 0, BTRFS_EXTENT_DATA_KEY);
65450aa6
LB
216
217 trans->block_rsv = rsv;
0af3d00b
JB
218 if (ret) {
219 WARN_ON(1);
220 return ret;
221 }
222
82d5902d
LZ
223 ret = btrfs_update_inode(trans, root, inode);
224 return ret;
0af3d00b
JB
225}
226
9d66e233
JB
227static int readahead_cache(struct inode *inode)
228{
229 struct file_ra_state *ra;
230 unsigned long last_index;
231
232 ra = kzalloc(sizeof(*ra), GFP_NOFS);
233 if (!ra)
234 return -ENOMEM;
235
236 file_ra_state_init(ra, inode->i_mapping);
237 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
238
239 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
240
241 kfree(ra);
242
243 return 0;
244}
245
0414efae
LZ
246int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
247 struct btrfs_free_space_ctl *ctl,
248 struct btrfs_path *path, u64 offset)
9d66e233 249{
9d66e233
JB
250 struct btrfs_free_space_header *header;
251 struct extent_buffer *leaf;
252 struct page *page;
9d66e233
JB
253 struct btrfs_key key;
254 struct list_head bitmaps;
255 u64 num_entries;
256 u64 num_bitmaps;
257 u64 generation;
9d66e233 258 pgoff_t index = 0;
f6a39829 259 int ret = 0;
9d66e233
JB
260
261 INIT_LIST_HEAD(&bitmaps);
262
9d66e233 263 /* Nothing in the space cache, goodbye */
0414efae 264 if (!i_size_read(inode))
9d66e233 265 goto out;
9d66e233
JB
266
267 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
0414efae 268 key.offset = offset;
9d66e233
JB
269 key.type = 0;
270
271 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
0414efae
LZ
272 if (ret < 0)
273 goto out;
274 else if (ret > 0) {
945d8962 275 btrfs_release_path(path);
0414efae 276 ret = 0;
9d66e233
JB
277 goto out;
278 }
279
0414efae
LZ
280 ret = -1;
281
9d66e233
JB
282 leaf = path->nodes[0];
283 header = btrfs_item_ptr(leaf, path->slots[0],
284 struct btrfs_free_space_header);
285 num_entries = btrfs_free_space_entries(leaf, header);
286 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
287 generation = btrfs_free_space_generation(leaf, header);
945d8962 288 btrfs_release_path(path);
9d66e233
JB
289
290 if (BTRFS_I(inode)->generation != generation) {
291 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
0414efae 292 " not match free space cache generation (%llu)\n",
9d66e233 293 (unsigned long long)BTRFS_I(inode)->generation,
0414efae
LZ
294 (unsigned long long)generation);
295 goto out;
9d66e233
JB
296 }
297
298 if (!num_entries)
299 goto out;
300
9d66e233 301 ret = readahead_cache(inode);
0414efae 302 if (ret)
9d66e233 303 goto out;
9d66e233
JB
304
305 while (1) {
306 struct btrfs_free_space_entry *entry;
307 struct btrfs_free_space *e;
308 void *addr;
309 unsigned long offset = 0;
9d66e233
JB
310 int need_loop = 0;
311
312 if (!num_entries && !num_bitmaps)
313 break;
314
a94733d0 315 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
0414efae 316 if (!page)
9d66e233 317 goto free_cache;
9d66e233
JB
318
319 if (!PageUptodate(page)) {
320 btrfs_readpage(NULL, page);
321 lock_page(page);
322 if (!PageUptodate(page)) {
323 unlock_page(page);
324 page_cache_release(page);
325 printk(KERN_ERR "btrfs: error reading free "
0414efae 326 "space cache\n");
9d66e233
JB
327 goto free_cache;
328 }
329 }
330 addr = kmap(page);
331
332 if (index == 0) {
333 u64 *gen;
334
2f356126
JB
335 /*
336 * We put a bogus crc in the front of the first page in
337 * case old kernels try to mount a fs with the new
338 * format to make sure they discard the cache.
339 */
340 addr += sizeof(u64);
341 offset += sizeof(u64);
342
343 gen = addr;
9d66e233 344 if (*gen != BTRFS_I(inode)->generation) {
6ab60601
JB
345 printk_ratelimited(KERN_ERR "btrfs: space cache"
346 " generation (%llu) does not match "
347 "inode (%llu)\n",
348 (unsigned long long)*gen,
349 (unsigned long long)
350 BTRFS_I(inode)->generation);
9d66e233
JB
351 kunmap(page);
352 unlock_page(page);
353 page_cache_release(page);
354 goto free_cache;
355 }
2f356126
JB
356 addr += sizeof(u64);
357 offset += sizeof(u64);
9d66e233 358 }
2f356126 359 entry = addr;
9d66e233
JB
360
361 while (1) {
362 if (!num_entries)
363 break;
364
365 need_loop = 1;
dc89e982
JB
366 e = kmem_cache_zalloc(btrfs_free_space_cachep,
367 GFP_NOFS);
9d66e233
JB
368 if (!e) {
369 kunmap(page);
370 unlock_page(page);
371 page_cache_release(page);
372 goto free_cache;
373 }
374
375 e->offset = le64_to_cpu(entry->offset);
376 e->bytes = le64_to_cpu(entry->bytes);
377 if (!e->bytes) {
378 kunmap(page);
dc89e982 379 kmem_cache_free(btrfs_free_space_cachep, e);
9d66e233
JB
380 unlock_page(page);
381 page_cache_release(page);
382 goto free_cache;
383 }
384
385 if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
34d52cb6
LZ
386 spin_lock(&ctl->tree_lock);
387 ret = link_free_space(ctl, e);
388 spin_unlock(&ctl->tree_lock);
207dde82
JB
389 if (ret) {
390 printk(KERN_ERR "Duplicate entries in "
391 "free space cache, dumping\n");
392 kunmap(page);
393 unlock_page(page);
394 page_cache_release(page);
395 goto free_cache;
396 }
9d66e233
JB
397 } else {
398 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
399 if (!e->bitmap) {
400 kunmap(page);
dc89e982
JB
401 kmem_cache_free(
402 btrfs_free_space_cachep, e);
9d66e233
JB
403 unlock_page(page);
404 page_cache_release(page);
405 goto free_cache;
406 }
34d52cb6 407 spin_lock(&ctl->tree_lock);
f6a39829 408 ret = link_free_space(ctl, e);
34d52cb6
LZ
409 ctl->total_bitmaps++;
410 ctl->op->recalc_thresholds(ctl);
411 spin_unlock(&ctl->tree_lock);
207dde82
JB
412 if (ret) {
413 printk(KERN_ERR "Duplicate entries in "
414 "free space cache, dumping\n");
415 kunmap(page);
416 unlock_page(page);
417 page_cache_release(page);
418 goto free_cache;
419 }
f6a39829 420 list_add_tail(&e->list, &bitmaps);
9d66e233
JB
421 }
422
423 num_entries--;
424 offset += sizeof(struct btrfs_free_space_entry);
425 if (offset + sizeof(struct btrfs_free_space_entry) >=
426 PAGE_CACHE_SIZE)
427 break;
428 entry++;
429 }
430
431 /*
432 * We read an entry out of this page, we need to move on to the
433 * next page.
434 */
435 if (need_loop) {
436 kunmap(page);
437 goto next;
438 }
439
440 /*
441 * We add the bitmaps at the end of the entries in order that
442 * the bitmap entries are added to the cache.
443 */
444 e = list_entry(bitmaps.next, struct btrfs_free_space, list);
445 list_del_init(&e->list);
446 memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
447 kunmap(page);
448 num_bitmaps--;
449next:
450 unlock_page(page);
451 page_cache_release(page);
452 index++;
453 }
454
455 ret = 1;
456out:
9d66e233 457 return ret;
9d66e233 458free_cache:
0414efae 459 __btrfs_remove_free_space_cache(ctl);
9d66e233
JB
460 goto out;
461}
462
0414efae
LZ
463int load_free_space_cache(struct btrfs_fs_info *fs_info,
464 struct btrfs_block_group_cache *block_group)
0cb59c99 465{
34d52cb6 466 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
0414efae
LZ
467 struct btrfs_root *root = fs_info->tree_root;
468 struct inode *inode;
469 struct btrfs_path *path;
470 int ret;
471 bool matched;
472 u64 used = btrfs_block_group_used(&block_group->item);
473
474 /*
475 * If we're unmounting then just return, since this does a search on the
476 * normal root and not the commit root and we could deadlock.
477 */
7841cb28 478 if (btrfs_fs_closing(fs_info))
0414efae
LZ
479 return 0;
480
481 /*
482 * If this block group has been marked to be cleared for one reason or
483 * another then we can't trust the on disk cache, so just return.
484 */
9d66e233 485 spin_lock(&block_group->lock);
0414efae
LZ
486 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
487 spin_unlock(&block_group->lock);
488 return 0;
489 }
9d66e233 490 spin_unlock(&block_group->lock);
0414efae
LZ
491
492 path = btrfs_alloc_path();
493 if (!path)
494 return 0;
495
496 inode = lookup_free_space_inode(root, block_group, path);
497 if (IS_ERR(inode)) {
498 btrfs_free_path(path);
499 return 0;
500 }
501
502 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
503 path, block_group->key.objectid);
504 btrfs_free_path(path);
505 if (ret <= 0)
506 goto out;
507
508 spin_lock(&ctl->tree_lock);
509 matched = (ctl->free_space == (block_group->key.offset - used -
510 block_group->bytes_super));
511 spin_unlock(&ctl->tree_lock);
512
513 if (!matched) {
514 __btrfs_remove_free_space_cache(ctl);
515 printk(KERN_ERR "block group %llu has an wrong amount of free "
516 "space\n", block_group->key.objectid);
517 ret = -1;
518 }
519out:
520 if (ret < 0) {
521 /* This cache is bogus, make sure it gets cleared */
522 spin_lock(&block_group->lock);
523 block_group->disk_cache_state = BTRFS_DC_CLEAR;
524 spin_unlock(&block_group->lock);
82d5902d 525 ret = 0;
0414efae
LZ
526
527 printk(KERN_ERR "btrfs: failed to load free space cache "
528 "for block group %llu\n", block_group->key.objectid);
529 }
530
531 iput(inode);
532 return ret;
9d66e233
JB
533}
534
0414efae
LZ
535int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
536 struct btrfs_free_space_ctl *ctl,
537 struct btrfs_block_group_cache *block_group,
538 struct btrfs_trans_handle *trans,
539 struct btrfs_path *path, u64 offset)
0cb59c99
JB
540{
541 struct btrfs_free_space_header *header;
542 struct extent_buffer *leaf;
0cb59c99
JB
543 struct rb_node *node;
544 struct list_head *pos, *n;
be1a12a0 545 struct page **pages;
0cb59c99
JB
546 struct page *page;
547 struct extent_state *cached_state = NULL;
43be2146
JB
548 struct btrfs_free_cluster *cluster = NULL;
549 struct extent_io_tree *unpin = NULL;
0cb59c99
JB
550 struct list_head bitmap_list;
551 struct btrfs_key key;
43be2146 552 u64 start, end, len;
0cb59c99 553 u64 bytes = 0;
2f356126 554 u32 crc = ~(u32)0;
be1a12a0 555 int index = 0, num_pages = 0;
0cb59c99
JB
556 int entries = 0;
557 int bitmaps = 0;
0414efae 558 int ret = -1;
43be2146 559 bool next_page = false;
be1a12a0 560 bool out_of_space = false;
0cb59c99 561
0cb59c99
JB
562 INIT_LIST_HEAD(&bitmap_list);
563
34d52cb6 564 node = rb_first(&ctl->free_space_offset);
0414efae 565 if (!node)
0cb59c99
JB
566 return 0;
567
0414efae
LZ
568 if (!i_size_read(inode))
569 return -1;
2b20982e 570
be1a12a0
JB
571 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
572 PAGE_CACHE_SHIFT;
211f96c2 573
0cb59c99
JB
574 filemap_write_and_wait(inode->i_mapping);
575 btrfs_wait_ordered_range(inode, inode->i_size &
576 ~(root->sectorsize - 1), (u64)-1);
577
be1a12a0 578 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
2f356126 579 if (!pages)
0414efae 580 return -1;
be1a12a0 581
43be2146 582 /* Get the cluster for this block_group if it exists */
0414efae 583 if (block_group && !list_empty(&block_group->cluster_list))
43be2146
JB
584 cluster = list_entry(block_group->cluster_list.next,
585 struct btrfs_free_cluster,
586 block_group_list);
587
588 /*
589 * We shouldn't have switched the pinned extents yet so this is the
590 * right one
591 */
592 unpin = root->fs_info->pinned_extents;
593
0cb59c99
JB
594 /*
595 * Lock all pages first so we can lock the extent safely.
596 *
597 * NOTE: Because we hold the ref the entire time we're going to write to
598 * the page find_get_page should never fail, so we don't do a check
599 * after find_get_page at this point. Just putting this here so people
600 * know and don't freak out.
601 */
be1a12a0 602 while (index < num_pages) {
a94733d0 603 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
0cb59c99 604 if (!page) {
be1a12a0 605 int i;
0cb59c99 606
be1a12a0
JB
607 for (i = 0; i < num_pages; i++) {
608 unlock_page(pages[i]);
609 page_cache_release(pages[i]);
0cb59c99 610 }
2f356126 611 goto out;
0cb59c99 612 }
be1a12a0 613 pages[index] = page;
0cb59c99
JB
614 index++;
615 }
616
617 index = 0;
618 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
619 0, &cached_state, GFP_NOFS);
620
43be2146
JB
621 /*
622 * When searching for pinned extents, we need to start at our start
623 * offset.
624 */
0414efae
LZ
625 if (block_group)
626 start = block_group->key.objectid;
43be2146 627
0cb59c99
JB
628 /* Write out the extent entries */
629 do {
630 struct btrfs_free_space_entry *entry;
2f356126 631 void *addr, *orig;
0cb59c99 632 unsigned long offset = 0;
0cb59c99 633
43be2146
JB
634 next_page = false;
635
be1a12a0
JB
636 if (index >= num_pages) {
637 out_of_space = true;
638 break;
639 }
640
641 page = pages[index];
0cb59c99 642
2f356126
JB
643 orig = addr = kmap(page);
644 if (index == 0) {
645 u64 *gen;
0cb59c99 646
2f356126
JB
647 /*
648 * We're going to put in a bogus crc for this page to
649 * make sure that old kernels who aren't aware of this
650 * format will be sure to discard the cache.
651 */
652 addr += sizeof(u64);
653 offset += sizeof(u64);
654
655 gen = addr;
656 *gen = trans->transid;
657 addr += sizeof(u64);
658 offset += sizeof(u64);
659 }
660 entry = addr;
661
662 memset(addr, 0, PAGE_CACHE_SIZE - offset);
43be2146 663 while (node && !next_page) {
0cb59c99
JB
664 struct btrfs_free_space *e;
665
666 e = rb_entry(node, struct btrfs_free_space, offset_index);
667 entries++;
668
669 entry->offset = cpu_to_le64(e->offset);
670 entry->bytes = cpu_to_le64(e->bytes);
671 if (e->bitmap) {
672 entry->type = BTRFS_FREE_SPACE_BITMAP;
673 list_add_tail(&e->list, &bitmap_list);
674 bitmaps++;
675 } else {
676 entry->type = BTRFS_FREE_SPACE_EXTENT;
677 }
678 node = rb_next(node);
43be2146
JB
679 if (!node && cluster) {
680 node = rb_first(&cluster->root);
681 cluster = NULL;
682 }
0cb59c99
JB
683 offset += sizeof(struct btrfs_free_space_entry);
684 if (offset + sizeof(struct btrfs_free_space_entry) >=
685 PAGE_CACHE_SIZE)
43be2146
JB
686 next_page = true;
687 entry++;
688 }
689
690 /*
691 * We want to add any pinned extents to our free space cache
692 * so we don't leak the space
693 */
0414efae
LZ
694 while (block_group && !next_page &&
695 (start < block_group->key.objectid +
696 block_group->key.offset)) {
43be2146
JB
697 ret = find_first_extent_bit(unpin, start, &start, &end,
698 EXTENT_DIRTY);
699 if (ret) {
700 ret = 0;
701 break;
702 }
703
704 /* This pinned extent is out of our range */
705 if (start >= block_group->key.objectid +
706 block_group->key.offset)
0cb59c99 707 break;
43be2146
JB
708
709 len = block_group->key.objectid +
710 block_group->key.offset - start;
711 len = min(len, end + 1 - start);
712
713 entries++;
714 entry->offset = cpu_to_le64(start);
715 entry->bytes = cpu_to_le64(len);
716 entry->type = BTRFS_FREE_SPACE_EXTENT;
717
718 start = end + 1;
719 offset += sizeof(struct btrfs_free_space_entry);
720 if (offset + sizeof(struct btrfs_free_space_entry) >=
721 PAGE_CACHE_SIZE)
722 next_page = true;
0cb59c99
JB
723 entry++;
724 }
0cb59c99 725
2f356126
JB
726 /* Generate bogus crc value */
727 if (index == 0) {
728 u32 *tmp;
729 crc = btrfs_csum_data(root, orig + sizeof(u64), crc,
730 PAGE_CACHE_SIZE - sizeof(u64));
731 btrfs_csum_final(crc, (char *)&crc);
732 crc++;
733 tmp = orig;
734 *tmp = crc;
735 }
736
737 kunmap(page);
0cb59c99
JB
738
739 bytes += PAGE_CACHE_SIZE;
740
0cb59c99 741 index++;
43be2146 742 } while (node || next_page);
0cb59c99
JB
743
744 /* Write out the bitmaps */
745 list_for_each_safe(pos, n, &bitmap_list) {
746 void *addr;
747 struct btrfs_free_space *entry =
748 list_entry(pos, struct btrfs_free_space, list);
749
be1a12a0
JB
750 if (index >= num_pages) {
751 out_of_space = true;
752 break;
753 }
f65647c2 754 page = pages[index];
0cb59c99
JB
755
756 addr = kmap(page);
757 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
0cb59c99 758 kunmap(page);
0cb59c99
JB
759 bytes += PAGE_CACHE_SIZE;
760
0cb59c99
JB
761 list_del_init(&entry->list);
762 index++;
763 }
764
be1a12a0
JB
765 if (out_of_space) {
766 btrfs_drop_pages(pages, num_pages);
767 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
768 i_size_read(inode) - 1, &cached_state,
769 GFP_NOFS);
770 ret = 0;
2f356126 771 goto out;
be1a12a0
JB
772 }
773
0cb59c99 774 /* Zero out the rest of the pages just to make sure */
be1a12a0 775 while (index < num_pages) {
0cb59c99
JB
776 void *addr;
777
be1a12a0 778 page = pages[index];
0cb59c99
JB
779 addr = kmap(page);
780 memset(addr, 0, PAGE_CACHE_SIZE);
781 kunmap(page);
0cb59c99
JB
782 bytes += PAGE_CACHE_SIZE;
783 index++;
784 }
785
be1a12a0
JB
786 ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
787 bytes, &cached_state);
788 btrfs_drop_pages(pages, num_pages);
0cb59c99
JB
789 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
790 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
791
be1a12a0
JB
792 if (ret) {
793 ret = 0;
2f356126 794 goto out;
be1a12a0
JB
795 }
796
797 BTRFS_I(inode)->generation = trans->transid;
798
0cb59c99
JB
799 filemap_write_and_wait(inode->i_mapping);
800
801 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
0414efae 802 key.offset = offset;
0cb59c99
JB
803 key.type = 0;
804
a9b5fcdd 805 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
0cb59c99 806 if (ret < 0) {
0414efae 807 ret = -1;
0cb59c99
JB
808 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
809 EXTENT_DIRTY | EXTENT_DELALLOC |
810 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
2f356126 811 goto out;
0cb59c99
JB
812 }
813 leaf = path->nodes[0];
814 if (ret > 0) {
815 struct btrfs_key found_key;
816 BUG_ON(!path->slots[0]);
817 path->slots[0]--;
818 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
819 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
0414efae
LZ
820 found_key.offset != offset) {
821 ret = -1;
0cb59c99
JB
822 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
823 EXTENT_DIRTY | EXTENT_DELALLOC |
824 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
825 GFP_NOFS);
b3b4aa74 826 btrfs_release_path(path);
2f356126 827 goto out;
0cb59c99
JB
828 }
829 }
830 header = btrfs_item_ptr(leaf, path->slots[0],
831 struct btrfs_free_space_header);
832 btrfs_set_free_space_entries(leaf, header, entries);
833 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
834 btrfs_set_free_space_generation(leaf, header, trans->transid);
835 btrfs_mark_buffer_dirty(leaf);
b3b4aa74 836 btrfs_release_path(path);
0cb59c99
JB
837
838 ret = 1;
839
2f356126 840out:
211f96c2 841 kfree(pages);
0414efae 842 if (ret != 1) {
0cb59c99 843 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
0cb59c99
JB
844 BTRFS_I(inode)->generation = 0;
845 }
0cb59c99 846 btrfs_update_inode(trans, root, inode);
0414efae
LZ
847 return ret;
848}
849
850int btrfs_write_out_cache(struct btrfs_root *root,
851 struct btrfs_trans_handle *trans,
852 struct btrfs_block_group_cache *block_group,
853 struct btrfs_path *path)
854{
855 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
856 struct inode *inode;
857 int ret = 0;
858
859 root = root->fs_info->tree_root;
860
861 spin_lock(&block_group->lock);
862 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
863 spin_unlock(&block_group->lock);
864 return 0;
865 }
866 spin_unlock(&block_group->lock);
867
868 inode = lookup_free_space_inode(root, block_group, path);
869 if (IS_ERR(inode))
870 return 0;
871
872 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
873 path, block_group->key.objectid);
874 if (ret < 0) {
875 spin_lock(&block_group->lock);
876 block_group->disk_cache_state = BTRFS_DC_ERROR;
877 spin_unlock(&block_group->lock);
82d5902d 878 ret = 0;
0414efae
LZ
879
880 printk(KERN_ERR "btrfs: failed to write free space cace "
881 "for block group %llu\n", block_group->key.objectid);
882 }
883
0cb59c99
JB
884 iput(inode);
885 return ret;
886}
887
34d52cb6 888static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
96303081 889 u64 offset)
0f9dd46c 890{
96303081
JB
891 BUG_ON(offset < bitmap_start);
892 offset -= bitmap_start;
34d52cb6 893 return (unsigned long)(div_u64(offset, unit));
96303081 894}
0f9dd46c 895
34d52cb6 896static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
96303081 897{
34d52cb6 898 return (unsigned long)(div_u64(bytes, unit));
96303081 899}
0f9dd46c 900
34d52cb6 901static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
96303081
JB
902 u64 offset)
903{
904 u64 bitmap_start;
905 u64 bytes_per_bitmap;
0f9dd46c 906
34d52cb6
LZ
907 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
908 bitmap_start = offset - ctl->start;
96303081
JB
909 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
910 bitmap_start *= bytes_per_bitmap;
34d52cb6 911 bitmap_start += ctl->start;
0f9dd46c 912
96303081 913 return bitmap_start;
0f9dd46c
JB
914}
915
96303081
JB
916static int tree_insert_offset(struct rb_root *root, u64 offset,
917 struct rb_node *node, int bitmap)
0f9dd46c
JB
918{
919 struct rb_node **p = &root->rb_node;
920 struct rb_node *parent = NULL;
921 struct btrfs_free_space *info;
922
923 while (*p) {
924 parent = *p;
96303081 925 info = rb_entry(parent, struct btrfs_free_space, offset_index);
0f9dd46c 926
96303081 927 if (offset < info->offset) {
0f9dd46c 928 p = &(*p)->rb_left;
96303081 929 } else if (offset > info->offset) {
0f9dd46c 930 p = &(*p)->rb_right;
96303081
JB
931 } else {
932 /*
933 * we could have a bitmap entry and an extent entry
934 * share the same offset. If this is the case, we want
935 * the extent entry to always be found first if we do a
936 * linear search through the tree, since we want to have
937 * the quickest allocation time, and allocating from an
938 * extent is faster than allocating from a bitmap. So
939 * if we're inserting a bitmap and we find an entry at
940 * this offset, we want to go right, or after this entry
941 * logically. If we are inserting an extent and we've
942 * found a bitmap, we want to go left, or before
943 * logically.
944 */
945 if (bitmap) {
207dde82
JB
946 if (info->bitmap) {
947 WARN_ON_ONCE(1);
948 return -EEXIST;
949 }
96303081
JB
950 p = &(*p)->rb_right;
951 } else {
207dde82
JB
952 if (!info->bitmap) {
953 WARN_ON_ONCE(1);
954 return -EEXIST;
955 }
96303081
JB
956 p = &(*p)->rb_left;
957 }
958 }
0f9dd46c
JB
959 }
960
961 rb_link_node(node, parent, p);
962 rb_insert_color(node, root);
963
964 return 0;
965}
966
967/*
70cb0743
JB
968 * searches the tree for the given offset.
969 *
96303081
JB
970 * fuzzy - If this is set, then we are trying to make an allocation, and we just
971 * want a section that has at least bytes size and comes at or after the given
972 * offset.
0f9dd46c 973 */
96303081 974static struct btrfs_free_space *
34d52cb6 975tree_search_offset(struct btrfs_free_space_ctl *ctl,
96303081 976 u64 offset, int bitmap_only, int fuzzy)
0f9dd46c 977{
34d52cb6 978 struct rb_node *n = ctl->free_space_offset.rb_node;
96303081
JB
979 struct btrfs_free_space *entry, *prev = NULL;
980
981 /* find entry that is closest to the 'offset' */
982 while (1) {
983 if (!n) {
984 entry = NULL;
985 break;
986 }
0f9dd46c 987
0f9dd46c 988 entry = rb_entry(n, struct btrfs_free_space, offset_index);
96303081 989 prev = entry;
0f9dd46c 990
96303081 991 if (offset < entry->offset)
0f9dd46c 992 n = n->rb_left;
96303081 993 else if (offset > entry->offset)
0f9dd46c 994 n = n->rb_right;
96303081 995 else
0f9dd46c 996 break;
0f9dd46c
JB
997 }
998
96303081
JB
999 if (bitmap_only) {
1000 if (!entry)
1001 return NULL;
1002 if (entry->bitmap)
1003 return entry;
0f9dd46c 1004
96303081
JB
1005 /*
1006 * bitmap entry and extent entry may share same offset,
1007 * in that case, bitmap entry comes after extent entry.
1008 */
1009 n = rb_next(n);
1010 if (!n)
1011 return NULL;
1012 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1013 if (entry->offset != offset)
1014 return NULL;
0f9dd46c 1015
96303081
JB
1016 WARN_ON(!entry->bitmap);
1017 return entry;
1018 } else if (entry) {
1019 if (entry->bitmap) {
0f9dd46c 1020 /*
96303081
JB
1021 * if previous extent entry covers the offset,
1022 * we should return it instead of the bitmap entry
0f9dd46c 1023 */
96303081
JB
1024 n = &entry->offset_index;
1025 while (1) {
1026 n = rb_prev(n);
1027 if (!n)
1028 break;
1029 prev = rb_entry(n, struct btrfs_free_space,
1030 offset_index);
1031 if (!prev->bitmap) {
1032 if (prev->offset + prev->bytes > offset)
1033 entry = prev;
1034 break;
1035 }
0f9dd46c 1036 }
96303081
JB
1037 }
1038 return entry;
1039 }
1040
1041 if (!prev)
1042 return NULL;
1043
1044 /* find last entry before the 'offset' */
1045 entry = prev;
1046 if (entry->offset > offset) {
1047 n = rb_prev(&entry->offset_index);
1048 if (n) {
1049 entry = rb_entry(n, struct btrfs_free_space,
1050 offset_index);
1051 BUG_ON(entry->offset > offset);
0f9dd46c 1052 } else {
96303081
JB
1053 if (fuzzy)
1054 return entry;
1055 else
1056 return NULL;
0f9dd46c
JB
1057 }
1058 }
1059
96303081
JB
1060 if (entry->bitmap) {
1061 n = &entry->offset_index;
1062 while (1) {
1063 n = rb_prev(n);
1064 if (!n)
1065 break;
1066 prev = rb_entry(n, struct btrfs_free_space,
1067 offset_index);
1068 if (!prev->bitmap) {
1069 if (prev->offset + prev->bytes > offset)
1070 return prev;
1071 break;
1072 }
1073 }
34d52cb6 1074 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
96303081
JB
1075 return entry;
1076 } else if (entry->offset + entry->bytes > offset)
1077 return entry;
1078
1079 if (!fuzzy)
1080 return NULL;
1081
1082 while (1) {
1083 if (entry->bitmap) {
1084 if (entry->offset + BITS_PER_BITMAP *
34d52cb6 1085 ctl->unit > offset)
96303081
JB
1086 break;
1087 } else {
1088 if (entry->offset + entry->bytes > offset)
1089 break;
1090 }
1091
1092 n = rb_next(&entry->offset_index);
1093 if (!n)
1094 return NULL;
1095 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1096 }
1097 return entry;
0f9dd46c
JB
1098}
1099
f333adb5 1100static inline void
34d52cb6 1101__unlink_free_space(struct btrfs_free_space_ctl *ctl,
f333adb5 1102 struct btrfs_free_space *info)
0f9dd46c 1103{
34d52cb6
LZ
1104 rb_erase(&info->offset_index, &ctl->free_space_offset);
1105 ctl->free_extents--;
f333adb5
LZ
1106}
1107
34d52cb6 1108static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
f333adb5
LZ
1109 struct btrfs_free_space *info)
1110{
34d52cb6
LZ
1111 __unlink_free_space(ctl, info);
1112 ctl->free_space -= info->bytes;
0f9dd46c
JB
1113}
1114
34d52cb6 1115static int link_free_space(struct btrfs_free_space_ctl *ctl,
0f9dd46c
JB
1116 struct btrfs_free_space *info)
1117{
1118 int ret = 0;
1119
96303081 1120 BUG_ON(!info->bitmap && !info->bytes);
34d52cb6 1121 ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
96303081 1122 &info->offset_index, (info->bitmap != NULL));
0f9dd46c
JB
1123 if (ret)
1124 return ret;
1125
34d52cb6
LZ
1126 ctl->free_space += info->bytes;
1127 ctl->free_extents++;
96303081
JB
1128 return ret;
1129}
1130
34d52cb6 1131static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
96303081 1132{
34d52cb6 1133 struct btrfs_block_group_cache *block_group = ctl->private;
25891f79
JB
1134 u64 max_bytes;
1135 u64 bitmap_bytes;
1136 u64 extent_bytes;
8eb2d829 1137 u64 size = block_group->key.offset;
34d52cb6
LZ
1138 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1139 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1140
1141 BUG_ON(ctl->total_bitmaps > max_bitmaps);
96303081
JB
1142
1143 /*
1144 * The goal is to keep the total amount of memory used per 1gb of space
1145 * at or below 32k, so we need to adjust how much memory we allow to be
1146 * used by extent based free space tracking
1147 */
8eb2d829
LZ
1148 if (size < 1024 * 1024 * 1024)
1149 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1150 else
1151 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1152 div64_u64(size, 1024 * 1024 * 1024);
96303081 1153
25891f79
JB
1154 /*
1155 * we want to account for 1 more bitmap than what we have so we can make
1156 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1157 * we add more bitmaps.
1158 */
34d52cb6 1159 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
96303081 1160
25891f79 1161 if (bitmap_bytes >= max_bytes) {
34d52cb6 1162 ctl->extents_thresh = 0;
25891f79
JB
1163 return;
1164 }
96303081 1165
25891f79
JB
1166 /*
1167 * we want the extent entry threshold to always be at most 1/2 the maxw
1168 * bytes we can have, or whatever is less than that.
1169 */
1170 extent_bytes = max_bytes - bitmap_bytes;
1171 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
96303081 1172
34d52cb6 1173 ctl->extents_thresh =
25891f79 1174 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
96303081
JB
1175}
1176
bb3ac5a4
MX
1177static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1178 struct btrfs_free_space *info,
1179 u64 offset, u64 bytes)
96303081 1180{
f38b6e75 1181 unsigned long start, count;
96303081 1182
34d52cb6
LZ
1183 start = offset_to_bit(info->offset, ctl->unit, offset);
1184 count = bytes_to_bits(bytes, ctl->unit);
f38b6e75 1185 BUG_ON(start + count > BITS_PER_BITMAP);
96303081 1186
f38b6e75 1187 bitmap_clear(info->bitmap, start, count);
96303081
JB
1188
1189 info->bytes -= bytes;
bb3ac5a4
MX
1190}
1191
1192static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1193 struct btrfs_free_space *info, u64 offset,
1194 u64 bytes)
1195{
1196 __bitmap_clear_bits(ctl, info, offset, bytes);
34d52cb6 1197 ctl->free_space -= bytes;
96303081
JB
1198}
1199
34d52cb6 1200static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
817d52f8
JB
1201 struct btrfs_free_space *info, u64 offset,
1202 u64 bytes)
96303081 1203{
f38b6e75 1204 unsigned long start, count;
96303081 1205
34d52cb6
LZ
1206 start = offset_to_bit(info->offset, ctl->unit, offset);
1207 count = bytes_to_bits(bytes, ctl->unit);
f38b6e75 1208 BUG_ON(start + count > BITS_PER_BITMAP);
96303081 1209
f38b6e75 1210 bitmap_set(info->bitmap, start, count);
96303081
JB
1211
1212 info->bytes += bytes;
34d52cb6 1213 ctl->free_space += bytes;
96303081
JB
1214}
1215
34d52cb6 1216static int search_bitmap(struct btrfs_free_space_ctl *ctl,
96303081
JB
1217 struct btrfs_free_space *bitmap_info, u64 *offset,
1218 u64 *bytes)
1219{
1220 unsigned long found_bits = 0;
1221 unsigned long bits, i;
1222 unsigned long next_zero;
1223
34d52cb6 1224 i = offset_to_bit(bitmap_info->offset, ctl->unit,
96303081 1225 max_t(u64, *offset, bitmap_info->offset));
34d52cb6 1226 bits = bytes_to_bits(*bytes, ctl->unit);
96303081
JB
1227
1228 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1229 i < BITS_PER_BITMAP;
1230 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1231 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1232 BITS_PER_BITMAP, i);
1233 if ((next_zero - i) >= bits) {
1234 found_bits = next_zero - i;
1235 break;
1236 }
1237 i = next_zero;
1238 }
1239
1240 if (found_bits) {
34d52cb6
LZ
1241 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1242 *bytes = (u64)(found_bits) * ctl->unit;
96303081
JB
1243 return 0;
1244 }
1245
1246 return -1;
1247}
1248
34d52cb6
LZ
1249static struct btrfs_free_space *
1250find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
96303081
JB
1251{
1252 struct btrfs_free_space *entry;
1253 struct rb_node *node;
1254 int ret;
1255
34d52cb6 1256 if (!ctl->free_space_offset.rb_node)
96303081
JB
1257 return NULL;
1258
34d52cb6 1259 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
96303081
JB
1260 if (!entry)
1261 return NULL;
1262
1263 for (node = &entry->offset_index; node; node = rb_next(node)) {
1264 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1265 if (entry->bytes < *bytes)
1266 continue;
1267
1268 if (entry->bitmap) {
34d52cb6 1269 ret = search_bitmap(ctl, entry, offset, bytes);
96303081
JB
1270 if (!ret)
1271 return entry;
1272 continue;
1273 }
1274
1275 *offset = entry->offset;
1276 *bytes = entry->bytes;
1277 return entry;
1278 }
1279
1280 return NULL;
1281}
1282
34d52cb6 1283static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
96303081
JB
1284 struct btrfs_free_space *info, u64 offset)
1285{
34d52cb6 1286 info->offset = offset_to_bitmap(ctl, offset);
f019f426 1287 info->bytes = 0;
34d52cb6
LZ
1288 link_free_space(ctl, info);
1289 ctl->total_bitmaps++;
96303081 1290
34d52cb6 1291 ctl->op->recalc_thresholds(ctl);
96303081
JB
1292}
1293
34d52cb6 1294static void free_bitmap(struct btrfs_free_space_ctl *ctl,
edf6e2d1
LZ
1295 struct btrfs_free_space *bitmap_info)
1296{
34d52cb6 1297 unlink_free_space(ctl, bitmap_info);
edf6e2d1 1298 kfree(bitmap_info->bitmap);
dc89e982 1299 kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
34d52cb6
LZ
1300 ctl->total_bitmaps--;
1301 ctl->op->recalc_thresholds(ctl);
edf6e2d1
LZ
1302}
1303
34d52cb6 1304static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
96303081
JB
1305 struct btrfs_free_space *bitmap_info,
1306 u64 *offset, u64 *bytes)
1307{
1308 u64 end;
6606bb97
JB
1309 u64 search_start, search_bytes;
1310 int ret;
96303081
JB
1311
1312again:
34d52cb6 1313 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
96303081 1314
6606bb97
JB
1315 /*
1316 * XXX - this can go away after a few releases.
1317 *
1318 * since the only user of btrfs_remove_free_space is the tree logging
1319 * stuff, and the only way to test that is under crash conditions, we
1320 * want to have this debug stuff here just in case somethings not
1321 * working. Search the bitmap for the space we are trying to use to
1322 * make sure its actually there. If its not there then we need to stop
1323 * because something has gone wrong.
1324 */
1325 search_start = *offset;
1326 search_bytes = *bytes;
13dbc089 1327 search_bytes = min(search_bytes, end - search_start + 1);
34d52cb6 1328 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
6606bb97
JB
1329 BUG_ON(ret < 0 || search_start != *offset);
1330
96303081 1331 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
34d52cb6 1332 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
96303081
JB
1333 *bytes -= end - *offset + 1;
1334 *offset = end + 1;
1335 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
34d52cb6 1336 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
96303081
JB
1337 *bytes = 0;
1338 }
1339
1340 if (*bytes) {
6606bb97 1341 struct rb_node *next = rb_next(&bitmap_info->offset_index);
edf6e2d1 1342 if (!bitmap_info->bytes)
34d52cb6 1343 free_bitmap(ctl, bitmap_info);
96303081 1344
6606bb97
JB
1345 /*
1346 * no entry after this bitmap, but we still have bytes to
1347 * remove, so something has gone wrong.
1348 */
1349 if (!next)
96303081
JB
1350 return -EINVAL;
1351
6606bb97
JB
1352 bitmap_info = rb_entry(next, struct btrfs_free_space,
1353 offset_index);
1354
1355 /*
1356 * if the next entry isn't a bitmap we need to return to let the
1357 * extent stuff do its work.
1358 */
96303081
JB
1359 if (!bitmap_info->bitmap)
1360 return -EAGAIN;
1361
6606bb97
JB
1362 /*
1363 * Ok the next item is a bitmap, but it may not actually hold
1364 * the information for the rest of this free space stuff, so
1365 * look for it, and if we don't find it return so we can try
1366 * everything over again.
1367 */
1368 search_start = *offset;
1369 search_bytes = *bytes;
34d52cb6 1370 ret = search_bitmap(ctl, bitmap_info, &search_start,
6606bb97
JB
1371 &search_bytes);
1372 if (ret < 0 || search_start != *offset)
1373 return -EAGAIN;
1374
96303081 1375 goto again;
edf6e2d1 1376 } else if (!bitmap_info->bytes)
34d52cb6 1377 free_bitmap(ctl, bitmap_info);
96303081
JB
1378
1379 return 0;
1380}
1381
2cdc342c
JB
1382static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1383 struct btrfs_free_space *info, u64 offset,
1384 u64 bytes)
1385{
1386 u64 bytes_to_set = 0;
1387 u64 end;
1388
1389 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1390
1391 bytes_to_set = min(end - offset, bytes);
1392
1393 bitmap_set_bits(ctl, info, offset, bytes_to_set);
1394
1395 return bytes_to_set;
1396
1397}
1398
34d52cb6
LZ
1399static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1400 struct btrfs_free_space *info)
96303081 1401{
34d52cb6 1402 struct btrfs_block_group_cache *block_group = ctl->private;
96303081
JB
1403
1404 /*
1405 * If we are below the extents threshold then we can add this as an
1406 * extent, and don't have to deal with the bitmap
1407 */
34d52cb6 1408 if (ctl->free_extents < ctl->extents_thresh) {
32cb0840
JB
1409 /*
1410 * If this block group has some small extents we don't want to
1411 * use up all of our free slots in the cache with them, we want
1412 * to reserve them to larger extents, however if we have plent
1413 * of cache left then go ahead an dadd them, no sense in adding
1414 * the overhead of a bitmap if we don't have to.
1415 */
1416 if (info->bytes <= block_group->sectorsize * 4) {
34d52cb6
LZ
1417 if (ctl->free_extents * 2 <= ctl->extents_thresh)
1418 return false;
32cb0840 1419 } else {
34d52cb6 1420 return false;
32cb0840
JB
1421 }
1422 }
96303081
JB
1423
1424 /*
1425 * some block groups are so tiny they can't be enveloped by a bitmap, so
1426 * don't even bother to create a bitmap for this
1427 */
1428 if (BITS_PER_BITMAP * block_group->sectorsize >
1429 block_group->key.offset)
34d52cb6
LZ
1430 return false;
1431
1432 return true;
1433}
1434
2cdc342c
JB
1435static struct btrfs_free_space_op free_space_op = {
1436 .recalc_thresholds = recalculate_thresholds,
1437 .use_bitmap = use_bitmap,
1438};
1439
34d52cb6
LZ
1440static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1441 struct btrfs_free_space *info)
1442{
1443 struct btrfs_free_space *bitmap_info;
2cdc342c 1444 struct btrfs_block_group_cache *block_group = NULL;
34d52cb6 1445 int added = 0;
2cdc342c 1446 u64 bytes, offset, bytes_added;
34d52cb6 1447 int ret;
96303081
JB
1448
1449 bytes = info->bytes;
1450 offset = info->offset;
1451
34d52cb6
LZ
1452 if (!ctl->op->use_bitmap(ctl, info))
1453 return 0;
1454
2cdc342c
JB
1455 if (ctl->op == &free_space_op)
1456 block_group = ctl->private;
38e87880 1457again:
2cdc342c
JB
1458 /*
1459 * Since we link bitmaps right into the cluster we need to see if we
1460 * have a cluster here, and if so and it has our bitmap we need to add
1461 * the free space to that bitmap.
1462 */
1463 if (block_group && !list_empty(&block_group->cluster_list)) {
1464 struct btrfs_free_cluster *cluster;
1465 struct rb_node *node;
1466 struct btrfs_free_space *entry;
1467
1468 cluster = list_entry(block_group->cluster_list.next,
1469 struct btrfs_free_cluster,
1470 block_group_list);
1471 spin_lock(&cluster->lock);
1472 node = rb_first(&cluster->root);
1473 if (!node) {
1474 spin_unlock(&cluster->lock);
38e87880 1475 goto no_cluster_bitmap;
2cdc342c
JB
1476 }
1477
1478 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1479 if (!entry->bitmap) {
1480 spin_unlock(&cluster->lock);
38e87880 1481 goto no_cluster_bitmap;
2cdc342c
JB
1482 }
1483
1484 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1485 bytes_added = add_bytes_to_bitmap(ctl, entry,
1486 offset, bytes);
1487 bytes -= bytes_added;
1488 offset += bytes_added;
1489 }
1490 spin_unlock(&cluster->lock);
1491 if (!bytes) {
1492 ret = 1;
1493 goto out;
1494 }
1495 }
38e87880
CM
1496
1497no_cluster_bitmap:
34d52cb6 1498 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
96303081
JB
1499 1, 0);
1500 if (!bitmap_info) {
1501 BUG_ON(added);
1502 goto new_bitmap;
1503 }
1504
2cdc342c
JB
1505 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1506 bytes -= bytes_added;
1507 offset += bytes_added;
1508 added = 0;
96303081
JB
1509
1510 if (!bytes) {
1511 ret = 1;
1512 goto out;
1513 } else
1514 goto again;
1515
1516new_bitmap:
1517 if (info && info->bitmap) {
34d52cb6 1518 add_new_bitmap(ctl, info, offset);
96303081
JB
1519 added = 1;
1520 info = NULL;
1521 goto again;
1522 } else {
34d52cb6 1523 spin_unlock(&ctl->tree_lock);
96303081
JB
1524
1525 /* no pre-allocated info, allocate a new one */
1526 if (!info) {
dc89e982
JB
1527 info = kmem_cache_zalloc(btrfs_free_space_cachep,
1528 GFP_NOFS);
96303081 1529 if (!info) {
34d52cb6 1530 spin_lock(&ctl->tree_lock);
96303081
JB
1531 ret = -ENOMEM;
1532 goto out;
1533 }
1534 }
1535
1536 /* allocate the bitmap */
1537 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
34d52cb6 1538 spin_lock(&ctl->tree_lock);
96303081
JB
1539 if (!info->bitmap) {
1540 ret = -ENOMEM;
1541 goto out;
1542 }
1543 goto again;
1544 }
1545
1546out:
1547 if (info) {
1548 if (info->bitmap)
1549 kfree(info->bitmap);
dc89e982 1550 kmem_cache_free(btrfs_free_space_cachep, info);
96303081 1551 }
0f9dd46c
JB
1552
1553 return ret;
1554}
1555
945d8962 1556static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
f333adb5 1557 struct btrfs_free_space *info, bool update_stat)
0f9dd46c 1558{
120d66ee
LZ
1559 struct btrfs_free_space *left_info;
1560 struct btrfs_free_space *right_info;
1561 bool merged = false;
1562 u64 offset = info->offset;
1563 u64 bytes = info->bytes;
6226cb0a 1564
0f9dd46c
JB
1565 /*
1566 * first we want to see if there is free space adjacent to the range we
1567 * are adding, if there is remove that struct and add a new one to
1568 * cover the entire range
1569 */
34d52cb6 1570 right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
96303081
JB
1571 if (right_info && rb_prev(&right_info->offset_index))
1572 left_info = rb_entry(rb_prev(&right_info->offset_index),
1573 struct btrfs_free_space, offset_index);
1574 else
34d52cb6 1575 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
0f9dd46c 1576
96303081 1577 if (right_info && !right_info->bitmap) {
f333adb5 1578 if (update_stat)
34d52cb6 1579 unlink_free_space(ctl, right_info);
f333adb5 1580 else
34d52cb6 1581 __unlink_free_space(ctl, right_info);
6226cb0a 1582 info->bytes += right_info->bytes;
dc89e982 1583 kmem_cache_free(btrfs_free_space_cachep, right_info);
120d66ee 1584 merged = true;
0f9dd46c
JB
1585 }
1586
96303081
JB
1587 if (left_info && !left_info->bitmap &&
1588 left_info->offset + left_info->bytes == offset) {
f333adb5 1589 if (update_stat)
34d52cb6 1590 unlink_free_space(ctl, left_info);
f333adb5 1591 else
34d52cb6 1592 __unlink_free_space(ctl, left_info);
6226cb0a
JB
1593 info->offset = left_info->offset;
1594 info->bytes += left_info->bytes;
dc89e982 1595 kmem_cache_free(btrfs_free_space_cachep, left_info);
120d66ee 1596 merged = true;
0f9dd46c
JB
1597 }
1598
120d66ee
LZ
1599 return merged;
1600}
1601
581bb050
LZ
1602int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1603 u64 offset, u64 bytes)
120d66ee
LZ
1604{
1605 struct btrfs_free_space *info;
1606 int ret = 0;
1607
dc89e982 1608 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
120d66ee
LZ
1609 if (!info)
1610 return -ENOMEM;
1611
1612 info->offset = offset;
1613 info->bytes = bytes;
1614
34d52cb6 1615 spin_lock(&ctl->tree_lock);
120d66ee 1616
34d52cb6 1617 if (try_merge_free_space(ctl, info, true))
120d66ee
LZ
1618 goto link;
1619
1620 /*
1621 * There was no extent directly to the left or right of this new
1622 * extent then we know we're going to have to allocate a new extent, so
1623 * before we do that see if we need to drop this into a bitmap
1624 */
34d52cb6 1625 ret = insert_into_bitmap(ctl, info);
120d66ee
LZ
1626 if (ret < 0) {
1627 goto out;
1628 } else if (ret) {
1629 ret = 0;
1630 goto out;
1631 }
1632link:
34d52cb6 1633 ret = link_free_space(ctl, info);
0f9dd46c 1634 if (ret)
dc89e982 1635 kmem_cache_free(btrfs_free_space_cachep, info);
96303081 1636out:
34d52cb6 1637 spin_unlock(&ctl->tree_lock);
6226cb0a 1638
0f9dd46c 1639 if (ret) {
96303081 1640 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
c293498b 1641 BUG_ON(ret == -EEXIST);
0f9dd46c
JB
1642 }
1643
0f9dd46c
JB
1644 return ret;
1645}
1646
6226cb0a
JB
1647int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1648 u64 offset, u64 bytes)
0f9dd46c 1649{
34d52cb6 1650 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
0f9dd46c 1651 struct btrfs_free_space *info;
96303081 1652 struct btrfs_free_space *next_info = NULL;
0f9dd46c
JB
1653 int ret = 0;
1654
34d52cb6 1655 spin_lock(&ctl->tree_lock);
6226cb0a 1656
96303081 1657again:
34d52cb6 1658 info = tree_search_offset(ctl, offset, 0, 0);
96303081 1659 if (!info) {
6606bb97
JB
1660 /*
1661 * oops didn't find an extent that matched the space we wanted
1662 * to remove, look for a bitmap instead
1663 */
34d52cb6 1664 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
6606bb97
JB
1665 1, 0);
1666 if (!info) {
1667 WARN_ON(1);
1668 goto out_lock;
1669 }
96303081
JB
1670 }
1671
1672 if (info->bytes < bytes && rb_next(&info->offset_index)) {
1673 u64 end;
1674 next_info = rb_entry(rb_next(&info->offset_index),
1675 struct btrfs_free_space,
1676 offset_index);
1677
1678 if (next_info->bitmap)
34d52cb6
LZ
1679 end = next_info->offset +
1680 BITS_PER_BITMAP * ctl->unit - 1;
96303081
JB
1681 else
1682 end = next_info->offset + next_info->bytes;
1683
1684 if (next_info->bytes < bytes ||
1685 next_info->offset > offset || offset > end) {
1686 printk(KERN_CRIT "Found free space at %llu, size %llu,"
1687 " trying to use %llu\n",
1688 (unsigned long long)info->offset,
1689 (unsigned long long)info->bytes,
1690 (unsigned long long)bytes);
0f9dd46c
JB
1691 WARN_ON(1);
1692 ret = -EINVAL;
96303081 1693 goto out_lock;
0f9dd46c 1694 }
0f9dd46c 1695
96303081
JB
1696 info = next_info;
1697 }
1698
1699 if (info->bytes == bytes) {
34d52cb6 1700 unlink_free_space(ctl, info);
96303081
JB
1701 if (info->bitmap) {
1702 kfree(info->bitmap);
34d52cb6 1703 ctl->total_bitmaps--;
0f9dd46c 1704 }
dc89e982 1705 kmem_cache_free(btrfs_free_space_cachep, info);
96303081
JB
1706 goto out_lock;
1707 }
0f9dd46c 1708
96303081 1709 if (!info->bitmap && info->offset == offset) {
34d52cb6 1710 unlink_free_space(ctl, info);
0f9dd46c
JB
1711 info->offset += bytes;
1712 info->bytes -= bytes;
34d52cb6 1713 link_free_space(ctl, info);
96303081
JB
1714 goto out_lock;
1715 }
0f9dd46c 1716
96303081
JB
1717 if (!info->bitmap && info->offset <= offset &&
1718 info->offset + info->bytes >= offset + bytes) {
9b49c9b9
CM
1719 u64 old_start = info->offset;
1720 /*
1721 * we're freeing space in the middle of the info,
1722 * this can happen during tree log replay
1723 *
1724 * first unlink the old info and then
1725 * insert it again after the hole we're creating
1726 */
34d52cb6 1727 unlink_free_space(ctl, info);
9b49c9b9
CM
1728 if (offset + bytes < info->offset + info->bytes) {
1729 u64 old_end = info->offset + info->bytes;
1730
1731 info->offset = offset + bytes;
1732 info->bytes = old_end - info->offset;
34d52cb6 1733 ret = link_free_space(ctl, info);
96303081
JB
1734 WARN_ON(ret);
1735 if (ret)
1736 goto out_lock;
9b49c9b9
CM
1737 } else {
1738 /* the hole we're creating ends at the end
1739 * of the info struct, just free the info
1740 */
dc89e982 1741 kmem_cache_free(btrfs_free_space_cachep, info);
9b49c9b9 1742 }
34d52cb6 1743 spin_unlock(&ctl->tree_lock);
96303081
JB
1744
1745 /* step two, insert a new info struct to cover
1746 * anything before the hole
9b49c9b9 1747 */
6226cb0a
JB
1748 ret = btrfs_add_free_space(block_group, old_start,
1749 offset - old_start);
96303081
JB
1750 WARN_ON(ret);
1751 goto out;
0f9dd46c 1752 }
96303081 1753
34d52cb6 1754 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
96303081
JB
1755 if (ret == -EAGAIN)
1756 goto again;
1757 BUG_ON(ret);
1758out_lock:
34d52cb6 1759 spin_unlock(&ctl->tree_lock);
0f9dd46c 1760out:
25179201
JB
1761 return ret;
1762}
1763
0f9dd46c
JB
1764void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1765 u64 bytes)
1766{
34d52cb6 1767 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
0f9dd46c
JB
1768 struct btrfs_free_space *info;
1769 struct rb_node *n;
1770 int count = 0;
1771
34d52cb6 1772 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
0f9dd46c
JB
1773 info = rb_entry(n, struct btrfs_free_space, offset_index);
1774 if (info->bytes >= bytes)
1775 count++;
96303081 1776 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
21380931 1777 (unsigned long long)info->offset,
96303081
JB
1778 (unsigned long long)info->bytes,
1779 (info->bitmap) ? "yes" : "no");
0f9dd46c 1780 }
96303081
JB
1781 printk(KERN_INFO "block group has cluster?: %s\n",
1782 list_empty(&block_group->cluster_list) ? "no" : "yes");
0f9dd46c
JB
1783 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1784 "\n", count);
1785}
1786
34d52cb6 1787void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
0f9dd46c 1788{
34d52cb6 1789 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
0f9dd46c 1790
34d52cb6
LZ
1791 spin_lock_init(&ctl->tree_lock);
1792 ctl->unit = block_group->sectorsize;
1793 ctl->start = block_group->key.objectid;
1794 ctl->private = block_group;
1795 ctl->op = &free_space_op;
0f9dd46c 1796
34d52cb6
LZ
1797 /*
1798 * we only want to have 32k of ram per block group for keeping
1799 * track of free space, and if we pass 1/2 of that we want to
1800 * start converting things over to using bitmaps
1801 */
1802 ctl->extents_thresh = ((1024 * 32) / 2) /
1803 sizeof(struct btrfs_free_space);
0f9dd46c
JB
1804}
1805
fa9c0d79
CM
1806/*
1807 * for a given cluster, put all of its extents back into the free
1808 * space cache. If the block group passed doesn't match the block group
1809 * pointed to by the cluster, someone else raced in and freed the
1810 * cluster already. In that case, we just return without changing anything
1811 */
1812static int
1813__btrfs_return_cluster_to_free_space(
1814 struct btrfs_block_group_cache *block_group,
1815 struct btrfs_free_cluster *cluster)
1816{
34d52cb6 1817 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
fa9c0d79
CM
1818 struct btrfs_free_space *entry;
1819 struct rb_node *node;
1820
1821 spin_lock(&cluster->lock);
1822 if (cluster->block_group != block_group)
1823 goto out;
1824
96303081 1825 cluster->block_group = NULL;
fa9c0d79 1826 cluster->window_start = 0;
96303081 1827 list_del_init(&cluster->block_group_list);
96303081 1828
fa9c0d79 1829 node = rb_first(&cluster->root);
96303081 1830 while (node) {
4e69b598
JB
1831 bool bitmap;
1832
fa9c0d79
CM
1833 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1834 node = rb_next(&entry->offset_index);
1835 rb_erase(&entry->offset_index, &cluster->root);
4e69b598
JB
1836
1837 bitmap = (entry->bitmap != NULL);
1838 if (!bitmap)
34d52cb6
LZ
1839 try_merge_free_space(ctl, entry, false);
1840 tree_insert_offset(&ctl->free_space_offset,
4e69b598 1841 entry->offset, &entry->offset_index, bitmap);
fa9c0d79 1842 }
6bef4d31 1843 cluster->root = RB_ROOT;
96303081 1844
fa9c0d79
CM
1845out:
1846 spin_unlock(&cluster->lock);
96303081 1847 btrfs_put_block_group(block_group);
fa9c0d79
CM
1848 return 0;
1849}
1850
09655373 1851void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
0f9dd46c
JB
1852{
1853 struct btrfs_free_space *info;
1854 struct rb_node *node;
581bb050 1855
581bb050
LZ
1856 while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
1857 info = rb_entry(node, struct btrfs_free_space, offset_index);
9b90f513
JB
1858 if (!info->bitmap) {
1859 unlink_free_space(ctl, info);
1860 kmem_cache_free(btrfs_free_space_cachep, info);
1861 } else {
1862 free_bitmap(ctl, info);
1863 }
581bb050
LZ
1864 if (need_resched()) {
1865 spin_unlock(&ctl->tree_lock);
1866 cond_resched();
1867 spin_lock(&ctl->tree_lock);
1868 }
1869 }
09655373
CM
1870}
1871
1872void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
1873{
1874 spin_lock(&ctl->tree_lock);
1875 __btrfs_remove_free_space_cache_locked(ctl);
581bb050
LZ
1876 spin_unlock(&ctl->tree_lock);
1877}
1878
1879void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1880{
1881 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
fa9c0d79 1882 struct btrfs_free_cluster *cluster;
96303081 1883 struct list_head *head;
0f9dd46c 1884
34d52cb6 1885 spin_lock(&ctl->tree_lock);
96303081
JB
1886 while ((head = block_group->cluster_list.next) !=
1887 &block_group->cluster_list) {
1888 cluster = list_entry(head, struct btrfs_free_cluster,
1889 block_group_list);
fa9c0d79
CM
1890
1891 WARN_ON(cluster->block_group != block_group);
1892 __btrfs_return_cluster_to_free_space(block_group, cluster);
96303081 1893 if (need_resched()) {
34d52cb6 1894 spin_unlock(&ctl->tree_lock);
96303081 1895 cond_resched();
34d52cb6 1896 spin_lock(&ctl->tree_lock);
96303081 1897 }
fa9c0d79 1898 }
09655373 1899 __btrfs_remove_free_space_cache_locked(ctl);
34d52cb6 1900 spin_unlock(&ctl->tree_lock);
fa9c0d79 1901
0f9dd46c
JB
1902}
1903
6226cb0a
JB
1904u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
1905 u64 offset, u64 bytes, u64 empty_size)
0f9dd46c 1906{
34d52cb6 1907 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
6226cb0a 1908 struct btrfs_free_space *entry = NULL;
96303081 1909 u64 bytes_search = bytes + empty_size;
6226cb0a 1910 u64 ret = 0;
0f9dd46c 1911
34d52cb6
LZ
1912 spin_lock(&ctl->tree_lock);
1913 entry = find_free_space(ctl, &offset, &bytes_search);
6226cb0a 1914 if (!entry)
96303081
JB
1915 goto out;
1916
1917 ret = offset;
1918 if (entry->bitmap) {
34d52cb6 1919 bitmap_clear_bits(ctl, entry, offset, bytes);
edf6e2d1 1920 if (!entry->bytes)
34d52cb6 1921 free_bitmap(ctl, entry);
96303081 1922 } else {
34d52cb6 1923 unlink_free_space(ctl, entry);
6226cb0a
JB
1924 entry->offset += bytes;
1925 entry->bytes -= bytes;
6226cb0a 1926 if (!entry->bytes)
dc89e982 1927 kmem_cache_free(btrfs_free_space_cachep, entry);
6226cb0a 1928 else
34d52cb6 1929 link_free_space(ctl, entry);
6226cb0a 1930 }
0f9dd46c 1931
96303081 1932out:
34d52cb6 1933 spin_unlock(&ctl->tree_lock);
817d52f8 1934
0f9dd46c
JB
1935 return ret;
1936}
fa9c0d79
CM
1937
1938/*
1939 * given a cluster, put all of its extents back into the free space
1940 * cache. If a block group is passed, this function will only free
1941 * a cluster that belongs to the passed block group.
1942 *
1943 * Otherwise, it'll get a reference on the block group pointed to by the
1944 * cluster and remove the cluster from it.
1945 */
1946int btrfs_return_cluster_to_free_space(
1947 struct btrfs_block_group_cache *block_group,
1948 struct btrfs_free_cluster *cluster)
1949{
34d52cb6 1950 struct btrfs_free_space_ctl *ctl;
fa9c0d79
CM
1951 int ret;
1952
1953 /* first, get a safe pointer to the block group */
1954 spin_lock(&cluster->lock);
1955 if (!block_group) {
1956 block_group = cluster->block_group;
1957 if (!block_group) {
1958 spin_unlock(&cluster->lock);
1959 return 0;
1960 }
1961 } else if (cluster->block_group != block_group) {
1962 /* someone else has already freed it don't redo their work */
1963 spin_unlock(&cluster->lock);
1964 return 0;
1965 }
1966 atomic_inc(&block_group->count);
1967 spin_unlock(&cluster->lock);
1968
34d52cb6
LZ
1969 ctl = block_group->free_space_ctl;
1970
fa9c0d79 1971 /* now return any extents the cluster had on it */
34d52cb6 1972 spin_lock(&ctl->tree_lock);
fa9c0d79 1973 ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
34d52cb6 1974 spin_unlock(&ctl->tree_lock);
fa9c0d79
CM
1975
1976 /* finally drop our ref */
1977 btrfs_put_block_group(block_group);
1978 return ret;
1979}
1980
96303081
JB
1981static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1982 struct btrfs_free_cluster *cluster,
4e69b598 1983 struct btrfs_free_space *entry,
96303081
JB
1984 u64 bytes, u64 min_start)
1985{
34d52cb6 1986 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
96303081
JB
1987 int err;
1988 u64 search_start = cluster->window_start;
1989 u64 search_bytes = bytes;
1990 u64 ret = 0;
1991
96303081
JB
1992 search_start = min_start;
1993 search_bytes = bytes;
1994
34d52cb6 1995 err = search_bitmap(ctl, entry, &search_start, &search_bytes);
96303081 1996 if (err)
4e69b598 1997 return 0;
96303081
JB
1998
1999 ret = search_start;
bb3ac5a4 2000 __bitmap_clear_bits(ctl, entry, ret, bytes);
96303081
JB
2001
2002 return ret;
2003}
2004
fa9c0d79
CM
2005/*
2006 * given a cluster, try to allocate 'bytes' from it, returns 0
2007 * if it couldn't find anything suitably large, or a logical disk offset
2008 * if things worked out
2009 */
2010u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2011 struct btrfs_free_cluster *cluster, u64 bytes,
2012 u64 min_start)
2013{
34d52cb6 2014 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
fa9c0d79
CM
2015 struct btrfs_free_space *entry = NULL;
2016 struct rb_node *node;
2017 u64 ret = 0;
2018
2019 spin_lock(&cluster->lock);
2020 if (bytes > cluster->max_size)
2021 goto out;
2022
2023 if (cluster->block_group != block_group)
2024 goto out;
2025
2026 node = rb_first(&cluster->root);
2027 if (!node)
2028 goto out;
2029
2030 entry = rb_entry(node, struct btrfs_free_space, offset_index);
fa9c0d79 2031 while(1) {
4e69b598
JB
2032 if (entry->bytes < bytes ||
2033 (!entry->bitmap && entry->offset < min_start)) {
fa9c0d79
CM
2034 node = rb_next(&entry->offset_index);
2035 if (!node)
2036 break;
2037 entry = rb_entry(node, struct btrfs_free_space,
2038 offset_index);
2039 continue;
2040 }
fa9c0d79 2041
4e69b598
JB
2042 if (entry->bitmap) {
2043 ret = btrfs_alloc_from_bitmap(block_group,
2044 cluster, entry, bytes,
2045 min_start);
2046 if (ret == 0) {
4e69b598
JB
2047 node = rb_next(&entry->offset_index);
2048 if (!node)
2049 break;
2050 entry = rb_entry(node, struct btrfs_free_space,
2051 offset_index);
2052 continue;
2053 }
2054 } else {
4e69b598
JB
2055 ret = entry->offset;
2056
2057 entry->offset += bytes;
2058 entry->bytes -= bytes;
2059 }
fa9c0d79 2060
5e71b5d5 2061 if (entry->bytes == 0)
fa9c0d79 2062 rb_erase(&entry->offset_index, &cluster->root);
fa9c0d79
CM
2063 break;
2064 }
2065out:
2066 spin_unlock(&cluster->lock);
96303081 2067
5e71b5d5
LZ
2068 if (!ret)
2069 return 0;
2070
34d52cb6 2071 spin_lock(&ctl->tree_lock);
5e71b5d5 2072
34d52cb6 2073 ctl->free_space -= bytes;
5e71b5d5 2074 if (entry->bytes == 0) {
34d52cb6 2075 ctl->free_extents--;
4e69b598
JB
2076 if (entry->bitmap) {
2077 kfree(entry->bitmap);
34d52cb6
LZ
2078 ctl->total_bitmaps--;
2079 ctl->op->recalc_thresholds(ctl);
4e69b598 2080 }
dc89e982 2081 kmem_cache_free(btrfs_free_space_cachep, entry);
5e71b5d5
LZ
2082 }
2083
34d52cb6 2084 spin_unlock(&ctl->tree_lock);
5e71b5d5 2085
fa9c0d79
CM
2086 return ret;
2087}
2088
96303081
JB
2089static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2090 struct btrfs_free_space *entry,
2091 struct btrfs_free_cluster *cluster,
2092 u64 offset, u64 bytes, u64 min_bytes)
2093{
34d52cb6 2094 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
96303081
JB
2095 unsigned long next_zero;
2096 unsigned long i;
2097 unsigned long search_bits;
2098 unsigned long total_bits;
2099 unsigned long found_bits;
2100 unsigned long start = 0;
2101 unsigned long total_found = 0;
4e69b598 2102 int ret;
96303081
JB
2103 bool found = false;
2104
2105 i = offset_to_bit(entry->offset, block_group->sectorsize,
2106 max_t(u64, offset, entry->offset));
d0a365e8
JB
2107 search_bits = bytes_to_bits(bytes, block_group->sectorsize);
2108 total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
96303081
JB
2109
2110again:
2111 found_bits = 0;
2112 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
2113 i < BITS_PER_BITMAP;
2114 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
2115 next_zero = find_next_zero_bit(entry->bitmap,
2116 BITS_PER_BITMAP, i);
2117 if (next_zero - i >= search_bits) {
2118 found_bits = next_zero - i;
2119 break;
2120 }
2121 i = next_zero;
2122 }
2123
2124 if (!found_bits)
4e69b598 2125 return -ENOSPC;
96303081
JB
2126
2127 if (!found) {
2128 start = i;
2129 found = true;
2130 }
2131
2132 total_found += found_bits;
2133
2134 if (cluster->max_size < found_bits * block_group->sectorsize)
2135 cluster->max_size = found_bits * block_group->sectorsize;
2136
2137 if (total_found < total_bits) {
2138 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
2139 if (i - start > total_bits * 2) {
2140 total_found = 0;
2141 cluster->max_size = 0;
2142 found = false;
2143 }
2144 goto again;
2145 }
2146
2147 cluster->window_start = start * block_group->sectorsize +
2148 entry->offset;
34d52cb6 2149 rb_erase(&entry->offset_index, &ctl->free_space_offset);
4e69b598
JB
2150 ret = tree_insert_offset(&cluster->root, entry->offset,
2151 &entry->offset_index, 1);
2152 BUG_ON(ret);
96303081
JB
2153
2154 return 0;
2155}
2156
4e69b598
JB
2157/*
2158 * This searches the block group for just extents to fill the cluster with.
2159 */
3de85bb9
JB
2160static noinline int
2161setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2162 struct btrfs_free_cluster *cluster,
2163 struct list_head *bitmaps, u64 offset, u64 bytes,
2164 u64 min_bytes)
4e69b598 2165{
34d52cb6 2166 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
4e69b598
JB
2167 struct btrfs_free_space *first = NULL;
2168 struct btrfs_free_space *entry = NULL;
2169 struct btrfs_free_space *prev = NULL;
2170 struct btrfs_free_space *last;
2171 struct rb_node *node;
2172 u64 window_start;
2173 u64 window_free;
2174 u64 max_extent;
2175 u64 max_gap = 128 * 1024;
2176
34d52cb6 2177 entry = tree_search_offset(ctl, offset, 0, 1);
4e69b598
JB
2178 if (!entry)
2179 return -ENOSPC;
2180
2181 /*
2182 * We don't want bitmaps, so just move along until we find a normal
2183 * extent entry.
2184 */
2185 while (entry->bitmap) {
86d4a77b
JB
2186 if (list_empty(&entry->list))
2187 list_add_tail(&entry->list, bitmaps);
4e69b598
JB
2188 node = rb_next(&entry->offset_index);
2189 if (!node)
2190 return -ENOSPC;
2191 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2192 }
2193
2194 window_start = entry->offset;
2195 window_free = entry->bytes;
2196 max_extent = entry->bytes;
2197 first = entry;
2198 last = entry;
2199 prev = entry;
2200
2201 while (window_free <= min_bytes) {
2202 node = rb_next(&entry->offset_index);
2203 if (!node)
2204 return -ENOSPC;
2205 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2206
86d4a77b
JB
2207 if (entry->bitmap) {
2208 if (list_empty(&entry->list))
2209 list_add_tail(&entry->list, bitmaps);
4e69b598 2210 continue;
86d4a77b
JB
2211 }
2212
4e69b598
JB
2213 /*
2214 * we haven't filled the empty size and the window is
2215 * very large. reset and try again
2216 */
2217 if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
2218 entry->offset - window_start > (min_bytes * 2)) {
2219 first = entry;
2220 window_start = entry->offset;
2221 window_free = entry->bytes;
2222 last = entry;
2223 max_extent = entry->bytes;
2224 } else {
2225 last = entry;
2226 window_free += entry->bytes;
2227 if (entry->bytes > max_extent)
2228 max_extent = entry->bytes;
2229 }
2230 prev = entry;
2231 }
2232
2233 cluster->window_start = first->offset;
2234
2235 node = &first->offset_index;
2236
2237 /*
2238 * now we've found our entries, pull them out of the free space
2239 * cache and put them into the cluster rbtree
2240 */
2241 do {
2242 int ret;
2243
2244 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2245 node = rb_next(&entry->offset_index);
2246 if (entry->bitmap)
2247 continue;
2248
34d52cb6 2249 rb_erase(&entry->offset_index, &ctl->free_space_offset);
4e69b598
JB
2250 ret = tree_insert_offset(&cluster->root, entry->offset,
2251 &entry->offset_index, 0);
2252 BUG_ON(ret);
2253 } while (node && entry != last);
2254
2255 cluster->max_size = max_extent;
2256
2257 return 0;
2258}
2259
2260/*
2261 * This specifically looks for bitmaps that may work in the cluster, we assume
2262 * that we have already failed to find extents that will work.
2263 */
3de85bb9
JB
2264static noinline int
2265setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2266 struct btrfs_free_cluster *cluster,
2267 struct list_head *bitmaps, u64 offset, u64 bytes,
2268 u64 min_bytes)
4e69b598 2269{
34d52cb6 2270 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
4e69b598
JB
2271 struct btrfs_free_space *entry;
2272 struct rb_node *node;
2273 int ret = -ENOSPC;
2274
34d52cb6 2275 if (ctl->total_bitmaps == 0)
4e69b598
JB
2276 return -ENOSPC;
2277
86d4a77b
JB
2278 /*
2279 * First check our cached list of bitmaps and see if there is an entry
2280 * here that will work.
2281 */
2282 list_for_each_entry(entry, bitmaps, list) {
2283 if (entry->bytes < min_bytes)
2284 continue;
2285 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2286 bytes, min_bytes);
2287 if (!ret)
2288 return 0;
2289 }
2290
2291 /*
2292 * If we do have entries on our list and we are here then we didn't find
2293 * anything, so go ahead and get the next entry after the last entry in
2294 * this list and start the search from there.
2295 */
2296 if (!list_empty(bitmaps)) {
2297 entry = list_entry(bitmaps->prev, struct btrfs_free_space,
2298 list);
2299 node = rb_next(&entry->offset_index);
2300 if (!node)
2301 return -ENOSPC;
2302 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2303 goto search;
2304 }
2305
34d52cb6 2306 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
4e69b598
JB
2307 if (!entry)
2308 return -ENOSPC;
2309
86d4a77b 2310search:
4e69b598
JB
2311 node = &entry->offset_index;
2312 do {
2313 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2314 node = rb_next(&entry->offset_index);
2315 if (!entry->bitmap)
2316 continue;
2317 if (entry->bytes < min_bytes)
2318 continue;
2319 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2320 bytes, min_bytes);
2321 } while (ret && node);
2322
2323 return ret;
2324}
2325
fa9c0d79
CM
2326/*
2327 * here we try to find a cluster of blocks in a block group. The goal
2328 * is to find at least bytes free and up to empty_size + bytes free.
2329 * We might not find them all in one contiguous area.
2330 *
2331 * returns zero and sets up cluster if things worked out, otherwise
2332 * it returns -enospc
2333 */
2334int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
451d7585 2335 struct btrfs_root *root,
fa9c0d79
CM
2336 struct btrfs_block_group_cache *block_group,
2337 struct btrfs_free_cluster *cluster,
2338 u64 offset, u64 bytes, u64 empty_size)
2339{
34d52cb6 2340 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
86d4a77b
JB
2341 struct list_head bitmaps;
2342 struct btrfs_free_space *entry, *tmp;
fa9c0d79 2343 u64 min_bytes;
fa9c0d79
CM
2344 int ret;
2345
2346 /* for metadata, allow allocates with more holes */
451d7585
CM
2347 if (btrfs_test_opt(root, SSD_SPREAD)) {
2348 min_bytes = bytes + empty_size;
2349 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
fa9c0d79
CM
2350 /*
2351 * we want to do larger allocations when we are
2352 * flushing out the delayed refs, it helps prevent
2353 * making more work as we go along.
2354 */
2355 if (trans->transaction->delayed_refs.flushing)
2356 min_bytes = max(bytes, (bytes + empty_size) >> 1);
2357 else
2358 min_bytes = max(bytes, (bytes + empty_size) >> 4);
2359 } else
2360 min_bytes = max(bytes, (bytes + empty_size) >> 2);
2361
34d52cb6 2362 spin_lock(&ctl->tree_lock);
7d0d2e8e
JB
2363
2364 /*
2365 * If we know we don't have enough space to make a cluster don't even
2366 * bother doing all the work to try and find one.
2367 */
34d52cb6
LZ
2368 if (ctl->free_space < min_bytes) {
2369 spin_unlock(&ctl->tree_lock);
7d0d2e8e
JB
2370 return -ENOSPC;
2371 }
2372
fa9c0d79
CM
2373 spin_lock(&cluster->lock);
2374
2375 /* someone already found a cluster, hooray */
2376 if (cluster->block_group) {
2377 ret = 0;
2378 goto out;
2379 }
fa9c0d79 2380
86d4a77b
JB
2381 INIT_LIST_HEAD(&bitmaps);
2382 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2383 bytes, min_bytes);
4e69b598 2384 if (ret)
86d4a77b
JB
2385 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2386 offset, bytes, min_bytes);
2387
2388 /* Clear our temporary list */
2389 list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2390 list_del_init(&entry->list);
fa9c0d79 2391
4e69b598
JB
2392 if (!ret) {
2393 atomic_inc(&block_group->count);
2394 list_add_tail(&cluster->block_group_list,
2395 &block_group->cluster_list);
2396 cluster->block_group = block_group;
fa9c0d79 2397 }
fa9c0d79
CM
2398out:
2399 spin_unlock(&cluster->lock);
34d52cb6 2400 spin_unlock(&ctl->tree_lock);
fa9c0d79
CM
2401
2402 return ret;
2403}
2404
2405/*
2406 * simple code to zero out a cluster
2407 */
2408void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2409{
2410 spin_lock_init(&cluster->lock);
2411 spin_lock_init(&cluster->refill_lock);
6bef4d31 2412 cluster->root = RB_ROOT;
fa9c0d79
CM
2413 cluster->max_size = 0;
2414 INIT_LIST_HEAD(&cluster->block_group_list);
2415 cluster->block_group = NULL;
2416}
2417
f7039b1d
LD
2418int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2419 u64 *trimmed, u64 start, u64 end, u64 minlen)
2420{
34d52cb6 2421 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
f7039b1d
LD
2422 struct btrfs_free_space *entry = NULL;
2423 struct btrfs_fs_info *fs_info = block_group->fs_info;
2424 u64 bytes = 0;
2425 u64 actually_trimmed;
2426 int ret = 0;
2427
2428 *trimmed = 0;
2429
2430 while (start < end) {
34d52cb6 2431 spin_lock(&ctl->tree_lock);
f7039b1d 2432
34d52cb6
LZ
2433 if (ctl->free_space < minlen) {
2434 spin_unlock(&ctl->tree_lock);
f7039b1d
LD
2435 break;
2436 }
2437
34d52cb6 2438 entry = tree_search_offset(ctl, start, 0, 1);
f7039b1d 2439 if (!entry)
34d52cb6
LZ
2440 entry = tree_search_offset(ctl,
2441 offset_to_bitmap(ctl, start),
f7039b1d
LD
2442 1, 1);
2443
2444 if (!entry || entry->offset >= end) {
34d52cb6 2445 spin_unlock(&ctl->tree_lock);
f7039b1d
LD
2446 break;
2447 }
2448
2449 if (entry->bitmap) {
34d52cb6 2450 ret = search_bitmap(ctl, entry, &start, &bytes);
f7039b1d
LD
2451 if (!ret) {
2452 if (start >= end) {
34d52cb6 2453 spin_unlock(&ctl->tree_lock);
f7039b1d
LD
2454 break;
2455 }
2456 bytes = min(bytes, end - start);
34d52cb6 2457 bitmap_clear_bits(ctl, entry, start, bytes);
f7039b1d 2458 if (entry->bytes == 0)
34d52cb6 2459 free_bitmap(ctl, entry);
f7039b1d
LD
2460 } else {
2461 start = entry->offset + BITS_PER_BITMAP *
2462 block_group->sectorsize;
34d52cb6 2463 spin_unlock(&ctl->tree_lock);
f7039b1d
LD
2464 ret = 0;
2465 continue;
2466 }
2467 } else {
2468 start = entry->offset;
2469 bytes = min(entry->bytes, end - start);
34d52cb6 2470 unlink_free_space(ctl, entry);
f789b684 2471 kmem_cache_free(btrfs_free_space_cachep, entry);
f7039b1d
LD
2472 }
2473
34d52cb6 2474 spin_unlock(&ctl->tree_lock);
f7039b1d
LD
2475
2476 if (bytes >= minlen) {
fb25e914
JB
2477 struct btrfs_space_info *space_info;
2478 int update = 0;
2479
2480 space_info = block_group->space_info;
2481 spin_lock(&space_info->lock);
2482 spin_lock(&block_group->lock);
2483 if (!block_group->ro) {
2484 block_group->reserved += bytes;
2485 space_info->bytes_reserved += bytes;
2486 update = 1;
2487 }
2488 spin_unlock(&block_group->lock);
2489 spin_unlock(&space_info->lock);
f7039b1d
LD
2490
2491 ret = btrfs_error_discard_extent(fs_info->extent_root,
2492 start,
2493 bytes,
2494 &actually_trimmed);
2495
34d52cb6 2496 btrfs_add_free_space(block_group, start, bytes);
fb25e914
JB
2497 if (update) {
2498 spin_lock(&space_info->lock);
2499 spin_lock(&block_group->lock);
2500 if (block_group->ro)
2501 space_info->bytes_readonly += bytes;
2502 block_group->reserved -= bytes;
2503 space_info->bytes_reserved -= bytes;
2504 spin_unlock(&space_info->lock);
2505 spin_unlock(&block_group->lock);
2506 }
f7039b1d
LD
2507
2508 if (ret)
2509 break;
2510 *trimmed += actually_trimmed;
2511 }
2512 start += bytes;
2513 bytes = 0;
2514
2515 if (fatal_signal_pending(current)) {
2516 ret = -ERESTARTSYS;
2517 break;
2518 }
2519
2520 cond_resched();
2521 }
2522
2523 return ret;
2524}
581bb050
LZ
2525
2526/*
2527 * Find the left-most item in the cache tree, and then return the
2528 * smallest inode number in the item.
2529 *
2530 * Note: the returned inode number may not be the smallest one in
2531 * the tree, if the left-most item is a bitmap.
2532 */
2533u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2534{
2535 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2536 struct btrfs_free_space *entry = NULL;
2537 u64 ino = 0;
2538
2539 spin_lock(&ctl->tree_lock);
2540
2541 if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2542 goto out;
2543
2544 entry = rb_entry(rb_first(&ctl->free_space_offset),
2545 struct btrfs_free_space, offset_index);
2546
2547 if (!entry->bitmap) {
2548 ino = entry->offset;
2549
2550 unlink_free_space(ctl, entry);
2551 entry->offset++;
2552 entry->bytes--;
2553 if (!entry->bytes)
2554 kmem_cache_free(btrfs_free_space_cachep, entry);
2555 else
2556 link_free_space(ctl, entry);
2557 } else {
2558 u64 offset = 0;
2559 u64 count = 1;
2560 int ret;
2561
2562 ret = search_bitmap(ctl, entry, &offset, &count);
2563 BUG_ON(ret);
2564
2565 ino = offset;
2566 bitmap_clear_bits(ctl, entry, offset, 1);
2567 if (entry->bytes == 0)
2568 free_bitmap(ctl, entry);
2569 }
2570out:
2571 spin_unlock(&ctl->tree_lock);
2572
2573 return ino;
2574}
82d5902d
LZ
2575
2576struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2577 struct btrfs_path *path)
2578{
2579 struct inode *inode = NULL;
2580
2581 spin_lock(&root->cache_lock);
2582 if (root->cache_inode)
2583 inode = igrab(root->cache_inode);
2584 spin_unlock(&root->cache_lock);
2585 if (inode)
2586 return inode;
2587
2588 inode = __lookup_free_space_inode(root, path, 0);
2589 if (IS_ERR(inode))
2590 return inode;
2591
2592 spin_lock(&root->cache_lock);
7841cb28 2593 if (!btrfs_fs_closing(root->fs_info))
82d5902d
LZ
2594 root->cache_inode = igrab(inode);
2595 spin_unlock(&root->cache_lock);
2596
2597 return inode;
2598}
2599
2600int create_free_ino_inode(struct btrfs_root *root,
2601 struct btrfs_trans_handle *trans,
2602 struct btrfs_path *path)
2603{
2604 return __create_free_space_inode(root, trans, path,
2605 BTRFS_FREE_INO_OBJECTID, 0);
2606}
2607
2608int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2609{
2610 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2611 struct btrfs_path *path;
2612 struct inode *inode;
2613 int ret = 0;
2614 u64 root_gen = btrfs_root_generation(&root->root_item);
2615
4b9465cb
CM
2616 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2617 return 0;
2618
82d5902d
LZ
2619 /*
2620 * If we're unmounting then just return, since this does a search on the
2621 * normal root and not the commit root and we could deadlock.
2622 */
7841cb28 2623 if (btrfs_fs_closing(fs_info))
82d5902d
LZ
2624 return 0;
2625
2626 path = btrfs_alloc_path();
2627 if (!path)
2628 return 0;
2629
2630 inode = lookup_free_ino_inode(root, path);
2631 if (IS_ERR(inode))
2632 goto out;
2633
2634 if (root_gen != BTRFS_I(inode)->generation)
2635 goto out_put;
2636
2637 ret = __load_free_space_cache(root, inode, ctl, path, 0);
2638
2639 if (ret < 0)
2640 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2641 "root %llu\n", root->root_key.objectid);
2642out_put:
2643 iput(inode);
2644out:
2645 btrfs_free_path(path);
2646 return ret;
2647}
2648
2649int btrfs_write_out_ino_cache(struct btrfs_root *root,
2650 struct btrfs_trans_handle *trans,
2651 struct btrfs_path *path)
2652{
2653 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2654 struct inode *inode;
2655 int ret;
2656
4b9465cb
CM
2657 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2658 return 0;
2659
82d5902d
LZ
2660 inode = lookup_free_ino_inode(root, path);
2661 if (IS_ERR(inode))
2662 return 0;
2663
2664 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2665 if (ret < 0)
2666 printk(KERN_ERR "btrfs: failed to write free ino cache "
2667 "for root %llu\n", root->root_key.objectid);
2668
2669 iput(inode);
2670 return ret;
2671}