btrfs: remove unused btrfs_cond_migrate_bytes
[linux-block.git] / fs / btrfs / extent_map.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
c1d7c514 2
d1310b2e 3#include <linux/err.h>
d1310b2e 4#include <linux/slab.h>
a52d9a80 5#include <linux/spinlock.h>
9b569ea0 6#include "messages.h"
261507a0 7#include "ctree.h"
1c11b63e 8#include "volumes.h"
a52d9a80 9#include "extent_map.h"
ebb8765b 10#include "compression.h"
4c0c8cfc 11#include "btrfs_inode.h"
a52d9a80 12
86479a04 13
a52d9a80 14static struct kmem_cache *extent_map_cache;
ca664626 15
2f4cbe64 16int __init extent_map_init(void)
a52d9a80 17{
837e1972 18 extent_map_cache = kmem_cache_create("btrfs_extent_map",
9601e3f6 19 sizeof(struct extent_map), 0,
fba4b697 20 SLAB_MEM_SPREAD, NULL);
2f4cbe64
WB
21 if (!extent_map_cache)
22 return -ENOMEM;
2f4cbe64 23 return 0;
a52d9a80
CM
24}
25
e67c718b 26void __cold extent_map_exit(void)
a52d9a80 27{
5598e900 28 kmem_cache_destroy(extent_map_cache);
a52d9a80
CM
29}
30
9d2423c5
CH
31/**
32 * extent_map_tree_init - initialize extent map tree
33 * @tree: tree to initialize
9d2423c5
CH
34 *
35 * Initialize the extent tree @tree. Should be called for each new inode
36 * or other user of the extent_map interface.
37 */
a8067e02 38void extent_map_tree_init(struct extent_map_tree *tree)
a52d9a80 39{
07e1ce09 40 tree->map = RB_ROOT_CACHED;
5dc562c5 41 INIT_LIST_HEAD(&tree->modified_extents);
890871be 42 rwlock_init(&tree->lock);
a52d9a80 43}
a52d9a80 44
9d2423c5
CH
45/**
46 * alloc_extent_map - allocate new extent map structure
9d2423c5
CH
47 *
48 * Allocate a new extent_map structure. The new structure is
49 * returned with a reference count of one and needs to be
50 * freed using free_extent_map()
51 */
172ddd60 52struct extent_map *alloc_extent_map(void)
a52d9a80
CM
53{
54 struct extent_map *em;
70c8a91c 55 em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS);
c26a9203
TI
56 if (!em)
57 return NULL;
cbc0e928 58 RB_CLEAR_NODE(&em->rb_node);
261507a0 59 em->compress_type = BTRFS_COMPRESS_NONE;
490b54d6 60 refcount_set(&em->refs, 1);
5dc562c5 61 INIT_LIST_HEAD(&em->list);
a52d9a80
CM
62 return em;
63}
a52d9a80 64
9d2423c5
CH
65/**
66 * free_extent_map - drop reference count of an extent_map
01327610 67 * @em: extent map being released
9d2423c5
CH
68 *
69 * Drops the reference out on @em by one and free the structure
70 * if the reference count hits zero.
71 */
a52d9a80
CM
72void free_extent_map(struct extent_map *em)
73{
2bf5a725
CM
74 if (!em)
75 return;
490b54d6 76 if (refcount_dec_and_test(&em->refs)) {
cbc0e928 77 WARN_ON(extent_map_in_tree(em));
5dc562c5 78 WARN_ON(!list_empty(&em->list));
298a8f9c 79 if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
95617d69 80 kfree(em->map_lookup);
a52d9a80
CM
81 kmem_cache_free(extent_map_cache, em);
82 }
83}
a52d9a80 84
32193c14
FDBM
85/* simple helper to do math around the end of an extent, handling wrap */
86static u64 range_end(u64 start, u64 len)
87{
88 if (start + len < start)
89 return (u64)-1;
90 return start + len;
91}
92
07e1ce09 93static int tree_insert(struct rb_root_cached *root, struct extent_map *em)
a52d9a80 94{
07e1ce09 95 struct rb_node **p = &root->rb_root.rb_node;
d397712b 96 struct rb_node *parent = NULL;
32193c14
FDBM
97 struct extent_map *entry = NULL;
98 struct rb_node *orig_parent = NULL;
99 u64 end = range_end(em->start, em->len);
07e1ce09 100 bool leftmost = true;
a52d9a80 101
d397712b 102 while (*p) {
a52d9a80 103 parent = *p;
d1310b2e
CM
104 entry = rb_entry(parent, struct extent_map, rb_node);
105
07e1ce09 106 if (em->start < entry->start) {
a52d9a80 107 p = &(*p)->rb_left;
07e1ce09 108 } else if (em->start >= extent_map_end(entry)) {
a52d9a80 109 p = &(*p)->rb_right;
07e1ce09
LB
110 leftmost = false;
111 } else {
32193c14 112 return -EEXIST;
07e1ce09 113 }
a52d9a80
CM
114 }
115
32193c14
FDBM
116 orig_parent = parent;
117 while (parent && em->start >= extent_map_end(entry)) {
118 parent = rb_next(parent);
119 entry = rb_entry(parent, struct extent_map, rb_node);
120 }
121 if (parent)
122 if (end > entry->start && em->start < extent_map_end(entry))
123 return -EEXIST;
124
125 parent = orig_parent;
126 entry = rb_entry(parent, struct extent_map, rb_node);
127 while (parent && em->start < entry->start) {
128 parent = rb_prev(parent);
129 entry = rb_entry(parent, struct extent_map, rb_node);
130 }
131 if (parent)
132 if (end > entry->start && em->start < extent_map_end(entry))
133 return -EEXIST;
134
32193c14 135 rb_link_node(&em->rb_node, orig_parent, p);
07e1ce09 136 rb_insert_color_cached(&em->rb_node, root, leftmost);
32193c14 137 return 0;
a52d9a80
CM
138}
139
d352ac68
CM
140/*
141 * search through the tree for an extent_map with a given offset. If
142 * it can't be found, try to find some neighboring extents
143 */
a52d9a80 144static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
6c05813e 145 struct rb_node **prev_or_next_ret)
a52d9a80 146{
d397712b 147 struct rb_node *n = root->rb_node;
a52d9a80 148 struct rb_node *prev = NULL;
5f56406a 149 struct rb_node *orig_prev = NULL;
d1310b2e
CM
150 struct extent_map *entry;
151 struct extent_map *prev_entry = NULL;
a52d9a80 152
6c05813e 153 ASSERT(prev_or_next_ret);
08f088dd 154
d397712b 155 while (n) {
d1310b2e 156 entry = rb_entry(n, struct extent_map, rb_node);
a52d9a80
CM
157 prev = n;
158 prev_entry = entry;
159
160 if (offset < entry->start)
161 n = n->rb_left;
d1310b2e 162 else if (offset >= extent_map_end(entry))
a52d9a80
CM
163 n = n->rb_right;
164 else
165 return n;
166 }
5f56406a 167
08f088dd
FM
168 orig_prev = prev;
169 while (prev && offset >= extent_map_end(prev_entry)) {
170 prev = rb_next(prev);
171 prev_entry = rb_entry(prev, struct extent_map, rb_node);
5f56406a
CM
172 }
173
6c05813e
FM
174 /*
175 * Previous extent map found, return as in this case the caller does not
176 * care about the next one.
177 */
178 if (prev) {
179 *prev_or_next_ret = prev;
180 return NULL;
181 }
182
183 prev = orig_prev;
08f088dd
FM
184 prev_entry = rb_entry(prev, struct extent_map, rb_node);
185 while (prev && offset < prev_entry->start) {
186 prev = rb_prev(prev);
d1310b2e 187 prev_entry = rb_entry(prev, struct extent_map, rb_node);
a52d9a80 188 }
6c05813e 189 *prev_or_next_ret = prev;
08f088dd 190
a52d9a80
CM
191 return NULL;
192}
193
d352ac68 194/* check to see if two extent_map structs are adjacent and safe to merge */
d1310b2e 195static int mergable_maps(struct extent_map *prev, struct extent_map *next)
a52d9a80 196{
7f3c74fb
CM
197 if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
198 return 0;
199
c8b97818
CM
200 /*
201 * don't merge compressed extents, we need to know their
202 * actual size
203 */
204 if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
205 return 0;
206
201a9038
JB
207 if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) ||
208 test_bit(EXTENT_FLAG_LOGGING, &next->flags))
209 return 0;
210
09a2a8f9
JB
211 /*
212 * We don't want to merge stuff that hasn't been written to the log yet
213 * since it may not reflect exactly what is on disk, and that would be
214 * bad.
215 */
216 if (!list_empty(&prev->list) || !list_empty(&next->list))
217 return 0;
218
951e05a9
NB
219 ASSERT(next->block_start != EXTENT_MAP_DELALLOC &&
220 prev->block_start != EXTENT_MAP_DELALLOC);
221
c3e14909
DS
222 if (prev->map_lookup || next->map_lookup)
223 ASSERT(test_bit(EXTENT_FLAG_FS_MAPPING, &prev->flags) &&
224 test_bit(EXTENT_FLAG_FS_MAPPING, &next->flags));
225
d1310b2e
CM
226 if (extent_map_end(prev) == next->start &&
227 prev->flags == next->flags &&
c3e14909 228 prev->map_lookup == next->map_lookup &&
d1310b2e
CM
229 ((next->block_start == EXTENT_MAP_HOLE &&
230 prev->block_start == EXTENT_MAP_HOLE) ||
231 (next->block_start == EXTENT_MAP_INLINE &&
232 prev->block_start == EXTENT_MAP_INLINE) ||
d1310b2e
CM
233 (next->block_start < EXTENT_MAP_LAST_BYTE - 1 &&
234 next->block_start == extent_map_block_end(prev)))) {
235 return 1;
236 }
a52d9a80
CM
237 return 0;
238}
239
4d2c8f62 240static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
a1ed835e 241{
a1ed835e
CM
242 struct extent_map *merge = NULL;
243 struct rb_node *rb;
a1ed835e 244
ac05ca91
FM
245 /*
246 * We can't modify an extent map that is in the tree and that is being
247 * used by another task, as it can cause that other task to see it in
248 * inconsistent state during the merging. We always have 1 reference for
249 * the tree and 1 for this task (which is unpinning the extent map or
250 * clearing the logging flag), so anything > 2 means it's being used by
251 * other tasks too.
252 */
253 if (refcount_read(&em->refs) > 2)
254 return;
255
a1ed835e
CM
256 if (em->start != 0) {
257 rb = rb_prev(&em->rb_node);
258 if (rb)
259 merge = rb_entry(rb, struct extent_map, rb_node);
260 if (rb && mergable_maps(merge, em)) {
261 em->start = merge->start;
70c8a91c 262 em->orig_start = merge->orig_start;
a1ed835e
CM
263 em->len += merge->len;
264 em->block_len += merge->block_len;
265 em->block_start = merge->block_start;
70c8a91c
JB
266 em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
267 em->mod_start = merge->mod_start;
268 em->generation = max(em->generation, merge->generation);
199257a7 269 set_bit(EXTENT_FLAG_MERGED, &em->flags);
5dc562c5 270
07e1ce09 271 rb_erase_cached(&merge->rb_node, &tree->map);
cbc0e928 272 RB_CLEAR_NODE(&merge->rb_node);
a1ed835e
CM
273 free_extent_map(merge);
274 }
275 }
276
277 rb = rb_next(&em->rb_node);
278 if (rb)
279 merge = rb_entry(rb, struct extent_map, rb_node);
280 if (rb && mergable_maps(em, merge)) {
281 em->len += merge->len;
d527afe1 282 em->block_len += merge->block_len;
07e1ce09 283 rb_erase_cached(&merge->rb_node, &tree->map);
cbc0e928 284 RB_CLEAR_NODE(&merge->rb_node);
70c8a91c
JB
285 em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
286 em->generation = max(em->generation, merge->generation);
199257a7 287 set_bit(EXTENT_FLAG_MERGED, &em->flags);
a1ed835e
CM
288 free_extent_map(merge);
289 }
4d2c8f62
LZ
290}
291
5dc562c5 292/**
52b1de91 293 * unpin_extent_cache - unpin an extent from the cache
5dc562c5
JB
294 * @tree: tree to unpin the extent in
295 * @start: logical offset in the file
296 * @len: length of the extent
297 * @gen: generation that this extent has been modified in
5dc562c5
JB
298 *
299 * Called after an extent has been written to disk properly. Set the generation
300 * to the generation that actually added the file item to the inode so we know
301 * we need to sync this extent when we call fsync().
302 */
303int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
304 u64 gen)
4d2c8f62
LZ
305{
306 int ret = 0;
307 struct extent_map *em;
4e2f84e6 308 bool prealloc = false;
4d2c8f62
LZ
309
310 write_lock(&tree->lock);
311 em = lookup_extent_mapping(tree, start, len);
312
313 WARN_ON(!em || em->start != start);
314
315 if (!em)
316 goto out;
317
5dc562c5 318 em->generation = gen;
4d2c8f62 319 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
4e2f84e6
LB
320 em->mod_start = em->start;
321 em->mod_len = em->len;
322
b11e234d 323 if (test_bit(EXTENT_FLAG_FILLING, &em->flags)) {
4e2f84e6 324 prealloc = true;
b11e234d 325 clear_bit(EXTENT_FLAG_FILLING, &em->flags);
4e2f84e6 326 }
4d2c8f62
LZ
327
328 try_merge_map(tree, em);
4e2f84e6
LB
329
330 if (prealloc) {
331 em->mod_start = em->start;
332 em->mod_len = em->len;
333 }
334
a1ed835e
CM
335 free_extent_map(em);
336out:
337 write_unlock(&tree->lock);
338 return ret;
339
340}
341
201a9038
JB
342void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
343{
74333c7d
FM
344 lockdep_assert_held_write(&tree->lock);
345
201a9038 346 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
cbc0e928 347 if (extent_map_in_tree(em))
222c81dc 348 try_merge_map(tree, em);
201a9038
JB
349}
350
176840b3
FM
351static inline void setup_extent_mapping(struct extent_map_tree *tree,
352 struct extent_map *em,
353 int modified)
354{
490b54d6 355 refcount_inc(&em->refs);
176840b3
FM
356 em->mod_start = em->start;
357 em->mod_len = em->len;
358
359 if (modified)
360 list_move(&em->list, &tree->modified_extents);
361 else
362 try_merge_map(tree, em);
363}
364
1c11b63e
JM
365static void extent_map_device_set_bits(struct extent_map *em, unsigned bits)
366{
367 struct map_lookup *map = em->map_lookup;
368 u64 stripe_size = em->orig_block_len;
369 int i;
370
371 for (i = 0; i < map->num_stripes; i++) {
4c664611 372 struct btrfs_io_stripe *stripe = &map->stripes[i];
1c11b63e
JM
373 struct btrfs_device *device = stripe->dev;
374
375 set_extent_bits_nowait(&device->alloc_state, stripe->physical,
376 stripe->physical + stripe_size - 1, bits);
377 }
378}
379
380static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
381{
382 struct map_lookup *map = em->map_lookup;
383 u64 stripe_size = em->orig_block_len;
384 int i;
385
386 for (i = 0; i < map->num_stripes; i++) {
4c664611 387 struct btrfs_io_stripe *stripe = &map->stripes[i];
1c11b63e
JM
388 struct btrfs_device *device = stripe->dev;
389
390 __clear_extent_bit(&device->alloc_state, stripe->physical,
391 stripe->physical + stripe_size - 1, bits,
bd015294 392 NULL, GFP_NOWAIT, NULL);
1c11b63e
JM
393 }
394}
395
9d2423c5 396/**
401bd2dd
NB
397 * Add new extent map to the extent tree
398 *
9d2423c5
CH
399 * @tree: tree to insert new map in
400 * @em: map to insert
401bd2dd
NB
401 * @modified: indicate whether the given @em should be added to the
402 * modified list, which indicates the extent needs to be logged
9d2423c5
CH
403 *
404 * Insert @em into @tree or perform a simple forward/backward merge with
405 * existing mappings. The extent_map struct passed in will be inserted
406 * into the tree directly, with an additional reference taken, or a
25985edc 407 * reference dropped if the merge attempt was successful.
a52d9a80
CM
408 */
409int add_extent_mapping(struct extent_map_tree *tree,
09a2a8f9 410 struct extent_map *em, int modified)
a52d9a80
CM
411{
412 int ret = 0;
a52d9a80 413
d23ea3fa
DS
414 lockdep_assert_held_write(&tree->lock);
415
32193c14
FDBM
416 ret = tree_insert(&tree->map, em);
417 if (ret)
a52d9a80 418 goto out;
32193c14 419
176840b3 420 setup_extent_mapping(tree, em, modified);
8811133d 421 if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) {
1c11b63e 422 extent_map_device_set_bits(em, CHUNK_ALLOCATED);
8811133d
NB
423 extent_map_device_clear_bits(em, CHUNK_TRIMMED);
424 }
a52d9a80 425out:
a52d9a80
CM
426 return ret;
427}
a52d9a80 428
48a3b636
ES
429static struct extent_map *
430__lookup_extent_mapping(struct extent_map_tree *tree,
431 u64 start, u64 len, int strict)
a52d9a80
CM
432{
433 struct extent_map *em;
434 struct rb_node *rb_node;
6c05813e 435 struct rb_node *prev_or_next = NULL;
306929f3
CH
436 u64 end = range_end(start, len);
437
6c05813e 438 rb_node = __tree_search(&tree->map.rb_root, start, &prev_or_next);
a52d9a80 439 if (!rb_node) {
6c05813e
FM
440 if (prev_or_next)
441 rb_node = prev_or_next;
ed64f066
LZ
442 else
443 return NULL;
a52d9a80 444 }
ed64f066 445
a52d9a80 446 em = rb_entry(rb_node, struct extent_map, rb_node);
d1310b2e 447
ed64f066
LZ
448 if (strict && !(end > em->start && start < extent_map_end(em)))
449 return NULL;
d1310b2e 450
490b54d6 451 refcount_inc(&em->refs);
a52d9a80
CM
452 return em;
453}
a52d9a80 454
ed64f066
LZ
455/**
456 * lookup_extent_mapping - lookup extent_map
457 * @tree: tree to lookup in
458 * @start: byte offset to start the search
459 * @len: length of the lookup range
460 *
461 * Find and return the first extent_map struct in @tree that intersects the
462 * [start, len] range. There may be additional objects in the tree that
463 * intersect, so check the object returned carefully to make sure that no
464 * additional lookups are needed.
465 */
466struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
467 u64 start, u64 len)
468{
469 return __lookup_extent_mapping(tree, start, len, 1);
470}
471
b917b7c3
CM
472/**
473 * search_extent_mapping - find a nearby extent map
474 * @tree: tree to lookup in
475 * @start: byte offset to start the search
476 * @len: length of the lookup range
477 *
478 * Find and return the first extent_map struct in @tree that intersects the
479 * [start, len] range.
480 *
481 * If one can't be found, any nearby extent may be returned
482 */
483struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
484 u64 start, u64 len)
485{
ed64f066 486 return __lookup_extent_mapping(tree, start, len, 0);
b917b7c3
CM
487}
488
9d2423c5
CH
489/**
490 * remove_extent_mapping - removes an extent_map from the extent tree
491 * @tree: extent tree to remove from
bb7ab3b9 492 * @em: extent map being removed
9d2423c5
CH
493 *
494 * Removes @em from @tree. No reference counts are dropped, and no checks
495 * are done to see if the range is in use
a52d9a80 496 */
c1766dd7 497void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
a52d9a80 498{
6d3b050e
FM
499 lockdep_assert_held_write(&tree->lock);
500
7f3c74fb 501 WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
07e1ce09 502 rb_erase_cached(&em->rb_node, &tree->map);
ff44c6e3
JB
503 if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
504 list_del_init(&em->list);
1c11b63e
JM
505 if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
506 extent_map_device_clear_bits(em, CHUNK_ALLOCATED);
cbc0e928 507 RB_CLEAR_NODE(&em->rb_node);
a52d9a80 508}
176840b3
FM
509
510void replace_extent_mapping(struct extent_map_tree *tree,
511 struct extent_map *cur,
512 struct extent_map *new,
513 int modified)
514{
6d3b050e
FM
515 lockdep_assert_held_write(&tree->lock);
516
176840b3
FM
517 WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags));
518 ASSERT(extent_map_in_tree(cur));
519 if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags))
520 list_del_init(&cur->list);
07e1ce09 521 rb_replace_node_cached(&cur->rb_node, &new->rb_node, &tree->map);
176840b3
FM
522 RB_CLEAR_NODE(&cur->rb_node);
523
524 setup_extent_mapping(tree, new, modified);
525}
c04e61b5 526
d47704bd 527static struct extent_map *next_extent_map(const struct extent_map *em)
c04e61b5
LB
528{
529 struct rb_node *next;
530
531 next = rb_next(&em->rb_node);
532 if (!next)
533 return NULL;
534 return container_of(next, struct extent_map, rb_node);
535}
536
d47704bd
FM
537/*
538 * Get the extent map that immediately follows another one.
539 *
540 * @tree: The extent map tree that the extent map belong to.
541 * Holding read or write access on the tree's lock is required.
542 * @em: An extent map from the given tree. The caller must ensure that
543 * between getting @em and between calling this function, the
544 * extent map @em is not removed from the tree - for example, by
545 * holding the tree's lock for the duration of those 2 operations.
546 *
547 * Returns the extent map that immediately follows @em, or NULL if @em is the
548 * last extent map in the tree.
549 */
550struct extent_map *btrfs_next_extent_map(const struct extent_map_tree *tree,
551 const struct extent_map *em)
552{
553 struct extent_map *next;
554
555 /* The lock must be acquired either in read mode or write mode. */
556 lockdep_assert_held(&tree->lock);
557 ASSERT(extent_map_in_tree(em));
558
559 next = next_extent_map(em);
560 if (next)
561 refcount_inc(&next->refs);
562
563 return next;
564}
565
c04e61b5
LB
566static struct extent_map *prev_extent_map(struct extent_map *em)
567{
568 struct rb_node *prev;
569
570 prev = rb_prev(&em->rb_node);
571 if (!prev)
572 return NULL;
573 return container_of(prev, struct extent_map, rb_node);
574}
575
52042d8e
AG
576/*
577 * Helper for btrfs_get_extent. Given an existing extent in the tree,
c04e61b5
LB
578 * the existing extent is the nearest extent to map_start,
579 * and an extent that you want to insert, deal with overlap and insert
580 * the best fitted new extent into the tree.
581 */
5f4791f4
LB
582static noinline int merge_extent_mapping(struct extent_map_tree *em_tree,
583 struct extent_map *existing,
584 struct extent_map *em,
585 u64 map_start)
c04e61b5
LB
586{
587 struct extent_map *prev;
588 struct extent_map *next;
589 u64 start;
590 u64 end;
591 u64 start_diff;
592
593 BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
594
595 if (existing->start > map_start) {
596 next = existing;
597 prev = prev_extent_map(next);
598 } else {
599 prev = existing;
600 next = next_extent_map(prev);
601 }
602
603 start = prev ? extent_map_end(prev) : em->start;
604 start = max_t(u64, start, em->start);
605 end = next ? next->start : extent_map_end(em);
606 end = min_t(u64, end, extent_map_end(em));
607 start_diff = start - em->start;
608 em->start = start;
609 em->len = end - start;
610 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
611 !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
612 em->block_start += start_diff;
613 em->block_len = em->len;
614 }
615 return add_extent_mapping(em_tree, em, 0);
616}
617
618/**
9ad37bb3
NB
619 * Add extent mapping into em_tree
620 *
621 * @fs_info: the filesystem
622 * @em_tree: extent tree into which we want to insert the extent mapping
623 * @em_in: extent we are inserting
624 * @start: start of the logical range btrfs_get_extent() is requesting
625 * @len: length of the logical range btrfs_get_extent() is requesting
c04e61b5
LB
626 *
627 * Note that @em_in's range may be different from [start, start+len),
628 * but they must be overlapped.
629 *
630 * Insert @em_in into @em_tree. In case there is an overlapping range, handle
631 * the -EEXIST by either:
632 * a) Returning the existing extent in @em_in if @start is within the
633 * existing em.
634 * b) Merge the existing extent with @em_in passed in.
635 *
636 * Return 0 on success, otherwise -EEXIST.
637 *
638 */
f46b24c9
DS
639int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
640 struct extent_map_tree *em_tree,
c04e61b5
LB
641 struct extent_map **em_in, u64 start, u64 len)
642{
643 int ret;
644 struct extent_map *em = *em_in;
645
646 ret = add_extent_mapping(em_tree, em, 0);
647 /* it is possible that someone inserted the extent into the tree
648 * while we had the lock dropped. It is also possible that
649 * an overlapping map exists in the tree
650 */
651 if (ret == -EEXIST) {
652 struct extent_map *existing;
653
654 ret = 0;
655
656 existing = search_extent_mapping(em_tree, start, len);
393da918 657
f46b24c9 658 trace_btrfs_handle_em_exist(fs_info, existing, em, start, len);
393da918 659
c04e61b5
LB
660 /*
661 * existing will always be non-NULL, since there must be
662 * extent causing the -EEXIST.
663 */
664 if (start >= existing->start &&
665 start < extent_map_end(existing)) {
666 free_extent_map(em);
667 *em_in = existing;
668 ret = 0;
669 } else {
9a7e10e7
LB
670 u64 orig_start = em->start;
671 u64 orig_len = em->len;
672
c04e61b5
LB
673 /*
674 * The existing extent map is the one nearest to
675 * the [start, start + len) range which overlaps
676 */
677 ret = merge_extent_mapping(em_tree, existing,
678 em, start);
c04e61b5
LB
679 if (ret) {
680 free_extent_map(em);
681 *em_in = NULL;
9a7e10e7
LB
682 WARN_ONCE(ret,
683"unexpected error %d: merge existing(start %llu len %llu) with em(start %llu len %llu)\n",
684 ret, existing->start, existing->len,
685 orig_start, orig_len);
c04e61b5 686 }
9a7e10e7 687 free_extent_map(existing);
c04e61b5
LB
688 }
689 }
690
691 ASSERT(ret == 0 || ret == -EEXIST);
692 return ret;
693}
4c0c8cfc 694
9c9d1b4f
FM
695/*
696 * Drop all extent maps from a tree in the fastest possible way, rescheduling
697 * if needed. This avoids searching the tree, from the root down to the first
698 * extent map, before each deletion.
699 */
700static void drop_all_extent_maps_fast(struct extent_map_tree *tree)
701{
702 write_lock(&tree->lock);
703 while (!RB_EMPTY_ROOT(&tree->map.rb_root)) {
704 struct extent_map *em;
705 struct rb_node *node;
706
707 node = rb_first_cached(&tree->map);
708 em = rb_entry(node, struct extent_map, rb_node);
709 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
710 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
711 remove_extent_mapping(tree, em);
712 free_extent_map(em);
713 cond_resched_rwlock_write(&tree->lock);
714 }
715 write_unlock(&tree->lock);
716}
717
4c0c8cfc
FM
718/*
719 * Drop all extent maps in a given range.
720 *
721 * @inode: The target inode.
722 * @start: Start offset of the range.
723 * @end: End offset of the range (inclusive value).
724 * @skip_pinned: Indicate if pinned extent maps should be ignored or not.
725 *
726 * This drops all the extent maps that intersect the given range [@start, @end].
727 * Extent maps that partially overlap the range and extend behind or beyond it,
728 * are split.
729 * The caller should have locked an appropriate file range in the inode's io
730 * tree before calling this function.
731 */
732void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
733 bool skip_pinned)
734{
db21370b
FM
735 struct extent_map *split;
736 struct extent_map *split2;
737 struct extent_map *em;
4c0c8cfc
FM
738 struct extent_map_tree *em_tree = &inode->extent_tree;
739 u64 len = end - start + 1;
4c0c8cfc
FM
740
741 WARN_ON(end < start);
742 if (end == (u64)-1) {
9c9d1b4f
FM
743 if (start == 0 && !skip_pinned) {
744 drop_all_extent_maps_fast(em_tree);
745 return;
746 }
4c0c8cfc 747 len = (u64)-1;
db21370b
FM
748 } else {
749 /* Make end offset exclusive for use in the loop below. */
750 end++;
4c0c8cfc 751 }
db21370b
FM
752
753 /*
754 * It's ok if we fail to allocate the extent maps, see the comment near
755 * the bottom of the loop below. We only need two spare extent maps in
756 * the worst case, where the first extent map that intersects our range
757 * starts before the range and the last extent map that intersects our
758 * range ends after our range (and they might be the same extent map),
759 * because we need to split those two extent maps at the boundaries.
760 */
761 split = alloc_extent_map();
762 split2 = alloc_extent_map();
763
764 write_lock(&em_tree->lock);
765 em = lookup_extent_mapping(em_tree, start, len);
766
767 while (em) {
768 /* extent_map_end() returns exclusive value (last byte + 1). */
769 const u64 em_end = extent_map_end(em);
770 struct extent_map *next_em = NULL;
4c0c8cfc
FM
771 u64 gen;
772 unsigned long flags;
4c0c8cfc
FM
773 bool modified;
774 bool compressed;
775
db21370b
FM
776 if (em_end < end) {
777 next_em = next_extent_map(em);
778 if (next_em) {
779 if (next_em->start < end)
780 refcount_inc(&next_em->refs);
781 else
782 next_em = NULL;
783 }
4c0c8cfc 784 }
db21370b 785
4c0c8cfc 786 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
f3109e33 787 start = em_end;
db21370b 788 if (end != (u64)-1)
f3109e33 789 len = start + len - em_end;
db21370b 790 goto next;
4c0c8cfc 791 }
db21370b 792
4c0c8cfc
FM
793 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
794 clear_bit(EXTENT_FLAG_LOGGING, &flags);
795 modified = !list_empty(&em->list);
db21370b
FM
796
797 /*
798 * The extent map does not cross our target range, so no need to
799 * split it, we can remove it directly.
800 */
801 if (em->start >= start && em_end <= end)
802 goto remove_em;
803
804 flags = em->flags;
805 gen = em->generation;
806 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
4c0c8cfc
FM
807
808 if (em->start < start) {
db21370b
FM
809 if (!split) {
810 split = split2;
811 split2 = NULL;
812 if (!split)
813 goto remove_em;
814 }
4c0c8cfc
FM
815 split->start = em->start;
816 split->len = start - em->start;
817
818 if (em->block_start < EXTENT_MAP_LAST_BYTE) {
819 split->orig_start = em->orig_start;
820 split->block_start = em->block_start;
821
822 if (compressed)
823 split->block_len = em->block_len;
824 else
825 split->block_len = split->len;
826 split->orig_block_len = max(split->block_len,
827 em->orig_block_len);
828 split->ram_bytes = em->ram_bytes;
829 } else {
830 split->orig_start = split->start;
831 split->block_len = 0;
832 split->block_start = em->block_start;
833 split->orig_block_len = 0;
834 split->ram_bytes = split->len;
835 }
836
837 split->generation = gen;
838 split->flags = flags;
839 split->compress_type = em->compress_type;
840 replace_extent_mapping(em_tree, em, split, modified);
841 free_extent_map(split);
842 split = split2;
843 split2 = NULL;
844 }
db21370b
FM
845 if (em_end > end) {
846 if (!split) {
847 split = split2;
848 split2 = NULL;
849 if (!split)
850 goto remove_em;
851 }
4c0c8cfc 852 split->start = start + len;
f3109e33 853 split->len = em_end - (start + len);
4c0c8cfc
FM
854 split->block_start = em->block_start;
855 split->flags = flags;
856 split->compress_type = em->compress_type;
857 split->generation = gen;
858
859 if (em->block_start < EXTENT_MAP_LAST_BYTE) {
860 split->orig_block_len = max(em->block_len,
861 em->orig_block_len);
862
863 split->ram_bytes = em->ram_bytes;
864 if (compressed) {
865 split->block_len = em->block_len;
866 split->orig_start = em->orig_start;
867 } else {
868 const u64 diff = start + len - em->start;
869
870 split->block_len = split->len;
871 split->block_start += diff;
872 split->orig_start = em->orig_start;
873 }
874 } else {
875 split->ram_bytes = split->len;
876 split->orig_start = split->start;
877 split->block_len = 0;
878 split->orig_block_len = 0;
879 }
880
881 if (extent_map_in_tree(em)) {
882 replace_extent_mapping(em_tree, em, split,
883 modified);
884 } else {
885 int ret;
886
887 ret = add_extent_mapping(em_tree, split,
888 modified);
889 /* Logic error, shouldn't happen. */
890 ASSERT(ret == 0);
891 if (WARN_ON(ret != 0) && modified)
892 btrfs_set_inode_full_sync(inode);
893 }
894 free_extent_map(split);
895 split = NULL;
896 }
db21370b 897remove_em:
4c0c8cfc
FM
898 if (extent_map_in_tree(em)) {
899 /*
900 * If the extent map is still in the tree it means that
901 * either of the following is true:
902 *
903 * 1) It fits entirely in our range (doesn't end beyond
904 * it or starts before it);
905 *
906 * 2) It starts before our range and/or ends after our
907 * range, and we were not able to allocate the extent
908 * maps for split operations, @split and @split2.
909 *
910 * If we are at case 2) then we just remove the entire
911 * extent map - this is fine since if anyone needs it to
912 * access the subranges outside our range, will just
913 * load it again from the subvolume tree's file extent
914 * item. However if the extent map was in the list of
915 * modified extents, then we must mark the inode for a
916 * full fsync, otherwise a fast fsync will miss this
917 * extent if it's new and needs to be logged.
918 */
db21370b
FM
919 if ((em->start < start || em_end > end) && modified) {
920 ASSERT(!split);
4c0c8cfc
FM
921 btrfs_set_inode_full_sync(inode);
922 }
923 remove_extent_mapping(em_tree, em);
924 }
4c0c8cfc 925
db21370b
FM
926 /*
927 * Once for the tree reference (we replaced or removed the
928 * extent map from the tree).
929 */
4c0c8cfc 930 free_extent_map(em);
db21370b
FM
931next:
932 /* Once for us (for our lookup reference). */
4c0c8cfc 933 free_extent_map(em);
db21370b
FM
934
935 em = next_em;
4c0c8cfc
FM
936 }
937
db21370b
FM
938 write_unlock(&em_tree->lock);
939
4c0c8cfc
FM
940 free_extent_map(split);
941 free_extent_map(split2);
942}
a1ba4c08
FM
943
944/*
945 * Replace a range in the inode's extent map tree with a new extent map.
946 *
947 * @inode: The target inode.
948 * @new_em: The new extent map to add to the inode's extent map tree.
949 * @modified: Indicate if the new extent map should be added to the list of
950 * modified extents (for fast fsync tracking).
951 *
952 * Drops all the extent maps in the inode's extent map tree that intersect the
953 * range of the new extent map and adds the new extent map to the tree.
954 * The caller should have locked an appropriate file range in the inode's io
955 * tree before calling this function.
956 */
957int btrfs_replace_extent_map_range(struct btrfs_inode *inode,
958 struct extent_map *new_em,
959 bool modified)
960{
961 const u64 end = new_em->start + new_em->len - 1;
962 struct extent_map_tree *tree = &inode->extent_tree;
963 int ret;
964
965 ASSERT(!extent_map_in_tree(new_em));
966
967 /*
968 * The caller has locked an appropriate file range in the inode's io
969 * tree, but getting -EEXIST when adding the new extent map can still
970 * happen in case there are extents that partially cover the range, and
971 * this is due to two tasks operating on different parts of the extent.
972 * See commit 18e83ac75bfe67 ("Btrfs: fix unexpected EEXIST from
973 * btrfs_get_extent") for an example and details.
974 */
975 do {
976 btrfs_drop_extent_map_range(inode, new_em->start, end, false);
977 write_lock(&tree->lock);
978 ret = add_extent_mapping(tree, new_em, modified);
979 write_unlock(&tree->lock);
980 } while (ret == -EEXIST);
981
982 return ret;
983}