dm space maps: improve performance with inc/dec on ranges of blocks
[linux-block.git] / drivers / md / persistent-data / dm-space-map-common.c
CommitLineData
3241b1d3
JT
1/*
2 * Copyright (C) 2011 Red Hat, Inc.
3 *
4 * This file is released under the GPL.
5 */
6
7#include "dm-space-map-common.h"
8#include "dm-transaction-manager.h"
be500ed7 9#include "dm-btree-internal.h"
3241b1d3
JT
10
11#include <linux/bitops.h>
12#include <linux/device-mapper.h>
13
14#define DM_MSG_PREFIX "space map common"
15
16/*----------------------------------------------------------------*/
17
18/*
19 * Index validator.
20 */
21#define INDEX_CSUM_XOR 160478
22
23static void index_prepare_for_write(struct dm_block_validator *v,
24 struct dm_block *b,
25 size_t block_size)
26{
27 struct disk_metadata_index *mi_le = dm_block_data(b);
28
29 mi_le->blocknr = cpu_to_le64(dm_block_location(b));
30 mi_le->csum = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
31 block_size - sizeof(__le32),
32 INDEX_CSUM_XOR));
33}
34
35static int index_check(struct dm_block_validator *v,
36 struct dm_block *b,
37 size_t block_size)
38{
39 struct disk_metadata_index *mi_le = dm_block_data(b);
40 __le32 csum_disk;
41
42 if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) {
89ddeb8c
MS
43 DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu",
44 le64_to_cpu(mi_le->blocknr), dm_block_location(b));
3241b1d3
JT
45 return -ENOTBLK;
46 }
47
48 csum_disk = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
49 block_size - sizeof(__le32),
50 INDEX_CSUM_XOR));
51 if (csum_disk != mi_le->csum) {
89ddeb8c
MS
52 DMERR_LIMIT("index_check failed: csum %u != wanted %u",
53 le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum));
3241b1d3
JT
54 return -EILSEQ;
55 }
56
57 return 0;
58}
59
60static struct dm_block_validator index_validator = {
61 .name = "index",
62 .prepare_for_write = index_prepare_for_write,
63 .check = index_check
64};
65
66/*----------------------------------------------------------------*/
67
68/*
69 * Bitmap validator
70 */
71#define BITMAP_CSUM_XOR 240779
72
5cc9cdf6
AS
73static void dm_bitmap_prepare_for_write(struct dm_block_validator *v,
74 struct dm_block *b,
75 size_t block_size)
3241b1d3
JT
76{
77 struct disk_bitmap_header *disk_header = dm_block_data(b);
78
79 disk_header->blocknr = cpu_to_le64(dm_block_location(b));
80 disk_header->csum = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
81 block_size - sizeof(__le32),
82 BITMAP_CSUM_XOR));
83}
84
5cc9cdf6
AS
85static int dm_bitmap_check(struct dm_block_validator *v,
86 struct dm_block *b,
87 size_t block_size)
3241b1d3
JT
88{
89 struct disk_bitmap_header *disk_header = dm_block_data(b);
90 __le32 csum_disk;
91
92 if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
89ddeb8c
MS
93 DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu",
94 le64_to_cpu(disk_header->blocknr), dm_block_location(b));
3241b1d3
JT
95 return -ENOTBLK;
96 }
97
98 csum_disk = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
99 block_size - sizeof(__le32),
100 BITMAP_CSUM_XOR));
101 if (csum_disk != disk_header->csum) {
89ddeb8c
MS
102 DMERR_LIMIT("bitmap check failed: csum %u != wanted %u",
103 le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
3241b1d3
JT
104 return -EILSEQ;
105 }
106
107 return 0;
108}
109
110static struct dm_block_validator dm_sm_bitmap_validator = {
111 .name = "sm_bitmap",
5cc9cdf6
AS
112 .prepare_for_write = dm_bitmap_prepare_for_write,
113 .check = dm_bitmap_check,
3241b1d3
JT
114};
115
116/*----------------------------------------------------------------*/
117
118#define ENTRIES_PER_WORD 32
119#define ENTRIES_SHIFT 5
120
121static void *dm_bitmap_data(struct dm_block *b)
122{
123 return dm_block_data(b) + sizeof(struct disk_bitmap_header);
124}
125
126#define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL
127
5cc9cdf6 128static unsigned dm_bitmap_word_used(void *addr, unsigned b)
3241b1d3
JT
129{
130 __le64 *words_le = addr;
131 __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
132
133 uint64_t bits = le64_to_cpu(*w_le);
134 uint64_t mask = (bits + WORD_MASK_HIGH + 1) & WORD_MASK_HIGH;
135
136 return !(~bits & mask);
137}
138
139static unsigned sm_lookup_bitmap(void *addr, unsigned b)
140{
141 __le64 *words_le = addr;
142 __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
143 unsigned hi, lo;
144
145 b = (b & (ENTRIES_PER_WORD - 1)) << 1;
146 hi = !!test_bit_le(b, (void *) w_le);
147 lo = !!test_bit_le(b + 1, (void *) w_le);
148 return (hi << 1) | lo;
149}
150
151static void sm_set_bitmap(void *addr, unsigned b, unsigned val)
152{
153 __le64 *words_le = addr;
154 __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
155
156 b = (b & (ENTRIES_PER_WORD - 1)) << 1;
157
158 if (val & 2)
159 __set_bit_le(b, (void *) w_le);
160 else
161 __clear_bit_le(b, (void *) w_le);
162
163 if (val & 1)
164 __set_bit_le(b + 1, (void *) w_le);
165 else
166 __clear_bit_le(b + 1, (void *) w_le);
167}
168
169static int sm_find_free(void *addr, unsigned begin, unsigned end,
170 unsigned *result)
171{
172 while (begin < end) {
173 if (!(begin & (ENTRIES_PER_WORD - 1)) &&
5cc9cdf6 174 dm_bitmap_word_used(addr, begin)) {
3241b1d3
JT
175 begin += ENTRIES_PER_WORD;
176 continue;
177 }
178
179 if (!sm_lookup_bitmap(addr, begin)) {
180 *result = begin;
181 return 0;
182 }
183
184 begin++;
185 }
186
187 return -ENOSPC;
188}
189
190/*----------------------------------------------------------------*/
191
192static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm)
193{
c6e086e0
MS
194 memset(ll, 0, sizeof(struct ll_disk));
195
3241b1d3
JT
196 ll->tm = tm;
197
198 ll->bitmap_info.tm = tm;
199 ll->bitmap_info.levels = 1;
200
201 /*
202 * Because the new bitmap blocks are created via a shadow
203 * operation, the old entry has already had its reference count
204 * decremented and we don't need the btree to do any bookkeeping.
205 */
206 ll->bitmap_info.value_type.size = sizeof(struct disk_index_entry);
207 ll->bitmap_info.value_type.inc = NULL;
208 ll->bitmap_info.value_type.dec = NULL;
209 ll->bitmap_info.value_type.equal = NULL;
210
211 ll->ref_count_info.tm = tm;
212 ll->ref_count_info.levels = 1;
213 ll->ref_count_info.value_type.size = sizeof(uint32_t);
214 ll->ref_count_info.value_type.inc = NULL;
215 ll->ref_count_info.value_type.dec = NULL;
216 ll->ref_count_info.value_type.equal = NULL;
217
218 ll->block_size = dm_bm_block_size(dm_tm_get_bm(tm));
219
220 if (ll->block_size > (1 << 30)) {
221 DMERR("block size too big to hold bitmaps");
222 return -EINVAL;
223 }
224
225 ll->entries_per_block = (ll->block_size - sizeof(struct disk_bitmap_header)) *
226 ENTRIES_PER_BYTE;
227 ll->nr_blocks = 0;
228 ll->bitmap_root = 0;
229 ll->ref_count_root = 0;
f4b90369 230 ll->bitmap_index_changed = false;
3241b1d3
JT
231
232 return 0;
233}
234
235int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
236{
237 int r;
238 dm_block_t i, nr_blocks, nr_indexes;
239 unsigned old_blocks, blocks;
240
241 nr_blocks = ll->nr_blocks + extra_blocks;
242 old_blocks = dm_sector_div_up(ll->nr_blocks, ll->entries_per_block);
243 blocks = dm_sector_div_up(nr_blocks, ll->entries_per_block);
244
245 nr_indexes = dm_sector_div_up(nr_blocks, ll->entries_per_block);
246 if (nr_indexes > ll->max_entries(ll)) {
247 DMERR("space map too large");
248 return -EINVAL;
249 }
250
12c91a5c
JT
251 /*
252 * We need to set this before the dm_tm_new_block() call below.
253 */
254 ll->nr_blocks = nr_blocks;
3241b1d3
JT
255 for (i = old_blocks; i < blocks; i++) {
256 struct dm_block *b;
257 struct disk_index_entry idx;
258
259 r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);
260 if (r < 0)
261 return r;
12c91a5c 262
3241b1d3
JT
263 idx.blocknr = cpu_to_le64(dm_block_location(b));
264
4c7da06f 265 dm_tm_unlock(ll->tm, b);
3241b1d3
JT
266
267 idx.nr_free = cpu_to_le32(ll->entries_per_block);
268 idx.none_free_before = 0;
269
270 r = ll->save_ie(ll, i, &idx);
271 if (r < 0)
272 return r;
273 }
274
3241b1d3
JT
275 return 0;
276}
277
278int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
279{
280 int r;
281 dm_block_t index = b;
282 struct disk_index_entry ie_disk;
283 struct dm_block *blk;
284
285 b = do_div(index, ll->entries_per_block);
286 r = ll->load_ie(ll, index, &ie_disk);
287 if (r < 0)
288 return r;
289
290 r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
291 &dm_sm_bitmap_validator, &blk);
292 if (r < 0)
293 return r;
294
295 *result = sm_lookup_bitmap(dm_bitmap_data(blk), b);
296
4c7da06f
MP
297 dm_tm_unlock(ll->tm, blk);
298
299 return 0;
3241b1d3
JT
300}
301
f722063e
JT
302static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b,
303 uint32_t *result)
3241b1d3
JT
304{
305 __le32 le_rc;
f722063e 306 int r;
3241b1d3
JT
307
308 r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc);
309 if (r < 0)
310 return r;
311
312 *result = le32_to_cpu(le_rc);
313
314 return r;
315}
316
f722063e
JT
317int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
318{
319 int r = sm_ll_lookup_bitmap(ll, b, result);
320
321 if (r)
322 return r;
323
324 if (*result != 3)
325 return r;
326
327 return sm_ll_lookup_big_ref_count(ll, b, result);
328}
329
3241b1d3
JT
330int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
331 dm_block_t end, dm_block_t *result)
332{
333 int r;
334 struct disk_index_entry ie_disk;
335 dm_block_t i, index_begin = begin;
336 dm_block_t index_end = dm_sector_div_up(end, ll->entries_per_block);
337
338 /*
339 * FIXME: Use shifts
340 */
341 begin = do_div(index_begin, ll->entries_per_block);
342 end = do_div(end, ll->entries_per_block);
5208692e
JT
343 if (end == 0)
344 end = ll->entries_per_block;
3241b1d3
JT
345
346 for (i = index_begin; i < index_end; i++, begin = 0) {
347 struct dm_block *blk;
348 unsigned position;
349 uint32_t bit_end;
350
351 r = ll->load_ie(ll, i, &ie_disk);
352 if (r < 0)
353 return r;
354
355 if (le32_to_cpu(ie_disk.nr_free) == 0)
356 continue;
357
358 r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
359 &dm_sm_bitmap_validator, &blk);
360 if (r < 0)
361 return r;
362
363 bit_end = (i == index_end - 1) ? end : ll->entries_per_block;
364
365 r = sm_find_free(dm_bitmap_data(blk),
366 max_t(unsigned, begin, le32_to_cpu(ie_disk.none_free_before)),
367 bit_end, &position);
368 if (r == -ENOSPC) {
369 /*
370 * This might happen because we started searching
371 * part way through the bitmap.
372 */
373 dm_tm_unlock(ll->tm, blk);
374 continue;
3241b1d3
JT
375 }
376
4c7da06f 377 dm_tm_unlock(ll->tm, blk);
3241b1d3
JT
378
379 *result = i * ll->entries_per_block + (dm_block_t) position;
380 return 0;
381 }
382
383 return -ENOSPC;
384}
385
4feaef83
JT
386int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
387 dm_block_t begin, dm_block_t end, dm_block_t *b)
388{
389 int r;
390 uint32_t count;
391
392 do {
393 r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b);
394 if (r)
395 break;
396
397 /* double check this block wasn't used in the old transaction */
398 if (*b >= old_ll->nr_blocks)
399 count = 0;
400 else {
401 r = sm_ll_lookup(old_ll, *b, &count);
402 if (r)
403 break;
404
405 if (count)
406 begin = *b + 1;
407 }
408 } while (count);
409
410 return r;
411}
412
be500ed7
JT
413/*----------------------------------------------------------------*/
414
415int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
416 uint32_t ref_count, int32_t *nr_allocations)
3241b1d3
JT
417{
418 int r;
be500ed7 419 uint32_t bit, old;
3241b1d3
JT
420 struct dm_block *nb;
421 dm_block_t index = b;
422 struct disk_index_entry ie_disk;
423 void *bm_le;
424 int inc;
425
426 bit = do_div(index, ll->entries_per_block);
427 r = ll->load_ie(ll, index, &ie_disk);
428 if (r < 0)
429 return r;
430
431 r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ie_disk.blocknr),
432 &dm_sm_bitmap_validator, &nb, &inc);
433 if (r < 0) {
434 DMERR("dm_tm_shadow_block() failed");
435 return r;
436 }
437 ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
3241b1d3 438 bm_le = dm_bitmap_data(nb);
3241b1d3 439
be500ed7 440 old = sm_lookup_bitmap(bm_le, bit);
f722063e
JT
441 if (old > 2) {
442 r = sm_ll_lookup_big_ref_count(ll, b, &old);
5b564d80
JT
443 if (r < 0) {
444 dm_tm_unlock(ll->tm, nb);
f722063e 445 return r;
5b564d80 446 }
f722063e
JT
447 }
448
5b564d80
JT
449 if (r) {
450 dm_tm_unlock(ll->tm, nb);
451 return r;
452 }
f722063e 453
3241b1d3
JT
454 if (ref_count <= 2) {
455 sm_set_bitmap(bm_le, bit, ref_count);
4c7da06f 456 dm_tm_unlock(ll->tm, nb);
3241b1d3 457
3241b1d3
JT
458 if (old > 2) {
459 r = dm_btree_remove(&ll->ref_count_info,
460 ll->ref_count_root,
461 &b, &ll->ref_count_root);
462 if (r)
463 return r;
464 }
3241b1d3
JT
465
466 } else {
467 __le32 le_rc = cpu_to_le32(ref_count);
468
469 sm_set_bitmap(bm_le, bit, 3);
4c7da06f 470 dm_tm_unlock(ll->tm, nb);
3241b1d3
JT
471
472 __dm_bless_for_disk(&le_rc);
473 r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
474 &b, &le_rc, &ll->ref_count_root);
475 if (r < 0) {
476 DMERR("ref count insert failed");
477 return r;
478 }
479 }
480
481 if (ref_count && !old) {
be500ed7 482 *nr_allocations = 1;
3241b1d3 483 ll->nr_allocated++;
0bcf0879 484 le32_add_cpu(&ie_disk.nr_free, -1);
3241b1d3
JT
485 if (le32_to_cpu(ie_disk.none_free_before) == bit)
486 ie_disk.none_free_before = cpu_to_le32(bit + 1);
487
488 } else if (old && !ref_count) {
be500ed7 489 *nr_allocations = -1;
3241b1d3 490 ll->nr_allocated--;
0bcf0879 491 le32_add_cpu(&ie_disk.nr_free, 1);
3241b1d3 492 ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
b446396b 493 } else
be500ed7 494 *nr_allocations = 0;
3241b1d3
JT
495
496 return ll->save_ie(ll, index, &ie_disk);
497}
498
be500ed7
JT
499/*----------------------------------------------------------------*/
500
501/*
502 * Holds useful intermediate results for the range based inc and dec
503 * operations.
504 */
505struct inc_context {
506 struct disk_index_entry ie_disk;
507 struct dm_block *bitmap_block;
508 void *bitmap;
509
510 struct dm_block *overflow_leaf;
511};
512
513static inline void init_inc_context(struct inc_context *ic)
514{
515 ic->bitmap_block = NULL;
516 ic->bitmap = NULL;
517 ic->overflow_leaf = NULL;
518}
519
520static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic)
521{
522 if (ic->bitmap_block)
523 dm_tm_unlock(ll->tm, ic->bitmap_block);
524 if (ic->overflow_leaf)
525 dm_tm_unlock(ll->tm, ic->overflow_leaf);
526}
527
528static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic)
529{
530 exit_inc_context(ll, ic);
531 init_inc_context(ic);
532}
533
534/*
535 * Confirms a btree node contains a particular key at an index.
536 */
537static bool contains_key(struct btree_node *n, uint64_t key, int index)
538{
539 return index >= 0 &&
540 index < le32_to_cpu(n->header.nr_entries) &&
541 le64_to_cpu(n->keys[index]) == key;
542}
543
544static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
3241b1d3 545{
be500ed7
JT
546 int r;
547 int index;
548 struct btree_node *n;
549 __le32 *v_ptr;
550 uint32_t rc;
551
552 /*
553 * bitmap_block needs to be unlocked because getting the
554 * overflow_leaf may need to allocate, and thus use the space map.
555 */
556 reset_inc_context(ll, ic);
557
558 r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
559 b, &index, &ll->ref_count_root, &ic->overflow_leaf);
560 if (r < 0)
561 return r;
562
563 n = dm_block_data(ic->overflow_leaf);
564
565 if (!contains_key(n, b, index)) {
566 DMERR("overflow btree is missing an entry");
567 return -EINVAL;
568 }
569
570 v_ptr = value_ptr(n, index);
571 rc = le32_to_cpu(*v_ptr) + 1;
572 *v_ptr = cpu_to_le32(rc);
573
5b564d80 574 return 0;
f722063e 575}
3241b1d3 576
be500ed7
JT
577static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
578{
579 int index;
580 struct btree_node *n;
581 __le32 *v_ptr;
582 uint32_t rc;
583
584 /*
585 * Do we already have the correct overflow leaf?
586 */
587 if (ic->overflow_leaf) {
588 n = dm_block_data(ic->overflow_leaf);
589 index = lower_bound(n, b);
590 if (contains_key(n, b, index)) {
591 v_ptr = value_ptr(n, index);
592 rc = le32_to_cpu(*v_ptr) + 1;
593 *v_ptr = cpu_to_le32(rc);
594
595 return 0;
596 }
597 }
598
599 return __sm_ll_inc_overflow(ll, b, ic);
600}
601
602static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic)
f722063e 603{
be500ed7
JT
604 int r, inc;
605 r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr),
606 &dm_sm_bitmap_validator, &ic->bitmap_block, &inc);
607 if (r < 0) {
608 DMERR("dm_tm_shadow_block() failed");
609 return r;
610 }
611 ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block));
612 ic->bitmap = dm_bitmap_data(ic->bitmap_block);
613 return 0;
3241b1d3
JT
614}
615
be500ed7
JT
616/*
617 * Once shadow_bitmap has been called, which always happens at the start of inc/dec,
618 * we can reopen the bitmap with a simple write lock, rather than re calling
619 * dm_tm_shadow_block().
620 */
621static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic)
3241b1d3 622{
be500ed7
JT
623 if (!ic->bitmap_block) {
624 int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr),
625 &dm_sm_bitmap_validator, &ic->bitmap_block);
626 if (r) {
627 DMERR("unable to re-get write lock for bitmap");
628 return r;
629 }
630 ic->bitmap = dm_bitmap_data(ic->bitmap_block);
631 }
632
5b564d80 633 return 0;
f722063e 634}
3241b1d3 635
be500ed7
JT
636/*
637 * Loops round incrementing entries in a single bitmap.
638 */
639static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b,
640 uint32_t bit, uint32_t bit_end,
641 int32_t *nr_allocations, dm_block_t *new_b,
642 struct inc_context *ic)
f722063e 643{
be500ed7
JT
644 int r;
645 __le32 le_rc;
646 uint32_t old;
647
648 for (; bit != bit_end; bit++, b++) {
649 /*
650 * We only need to drop the bitmap if we need to find a new btree
651 * leaf for the overflow. So if it was dropped last iteration,
652 * we now re-get it.
653 */
654 r = ensure_bitmap(ll, ic);
655 if (r)
656 return r;
657
658 old = sm_lookup_bitmap(ic->bitmap, bit);
659 switch (old) {
660 case 0:
661 /* inc bitmap, adjust nr_allocated */
662 sm_set_bitmap(ic->bitmap, bit, 1);
663 (*nr_allocations)++;
664 ll->nr_allocated++;
665 le32_add_cpu(&ic->ie_disk.nr_free, -1);
666 if (le32_to_cpu(ic->ie_disk.none_free_before) == bit)
667 ic->ie_disk.none_free_before = cpu_to_le32(bit + 1);
668 break;
669
670 case 1:
671 /* inc bitmap */
672 sm_set_bitmap(ic->bitmap, bit, 2);
673 break;
674
675 case 2:
676 /* inc bitmap and insert into overflow */
677 sm_set_bitmap(ic->bitmap, bit, 3);
678 reset_inc_context(ll, ic);
679
680 le_rc = cpu_to_le32(3);
681 __dm_bless_for_disk(&le_rc);
682 r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
683 &b, &le_rc, &ll->ref_count_root);
684 if (r < 0) {
685 DMERR("ref count insert failed");
686 return r;
687 }
688 break;
689
690 default:
691 /*
692 * inc within the overflow tree only.
693 */
694 r = sm_ll_inc_overflow(ll, b, ic);
695 if (r < 0)
696 return r;
697 }
698 }
699
700 *new_b = b;
701 return 0;
f722063e 702}
3241b1d3 703
be500ed7
JT
704/*
705 * Finds a bitmap that contains entries in the block range, and increments
706 * them.
707 */
708static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
709 int32_t *nr_allocations, dm_block_t *new_b)
f722063e 710{
be500ed7
JT
711 int r;
712 struct inc_context ic;
713 uint32_t bit, bit_end;
714 dm_block_t index = b;
715
716 init_inc_context(&ic);
717
718 bit = do_div(index, ll->entries_per_block);
719 r = ll->load_ie(ll, index, &ic.ie_disk);
720 if (r < 0)
721 return r;
722
723 r = shadow_bitmap(ll, &ic);
724 if (r)
725 return r;
726
727 bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
728 r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic);
729
730 exit_inc_context(ll, &ic);
731
732 if (r)
733 return r;
734
735 return ll->save_ie(ll, index, &ic.ie_disk);
736}
737
738int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
739 int32_t *nr_allocations)
740{
741 *nr_allocations = 0;
742 while (b != e) {
743 int r = __sm_ll_inc(ll, b, e, nr_allocations, &b);
744 if (r)
745 return r;
746 }
747
748 return 0;
749}
750
751/*----------------------------------------------------------------*/
752
753static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b,
754 struct inc_context *ic)
755{
756 reset_inc_context(ll, ic);
757 return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root,
758 &b, &ll->ref_count_root);
759}
760
761static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
762 struct inc_context *ic, uint32_t *old_rc)
763{
764 int r;
765 int index = -1;
766 struct btree_node *n;
767 __le32 *v_ptr;
768 uint32_t rc;
769
770 reset_inc_context(ll, ic);
771 r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
772 b, &index, &ll->ref_count_root, &ic->overflow_leaf);
773 if (r < 0)
774 return r;
775
776 n = dm_block_data(ic->overflow_leaf);
777
778 if (!contains_key(n, b, index)) {
779 DMERR("overflow btree is missing an entry");
5b564d80
JT
780 return -EINVAL;
781 }
782
be500ed7
JT
783 v_ptr = value_ptr(n, index);
784 rc = le32_to_cpu(*v_ptr);
785 *old_rc = rc;
786
787 if (rc == 3) {
788 return __sm_ll_del_overflow(ll, b, ic);
789 } else {
790 rc--;
791 *v_ptr = cpu_to_le32(rc);
792 return 0;
793 }
794}
795
796static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
797 struct inc_context *ic, uint32_t *old_rc)
798{
799 /*
800 * Do we already have the correct overflow leaf?
801 */
802 if (ic->overflow_leaf) {
803 int index;
804 struct btree_node *n;
805 __le32 *v_ptr;
806 uint32_t rc;
807
808 n = dm_block_data(ic->overflow_leaf);
809 index = lower_bound(n, b);
810 if (contains_key(n, b, index)) {
811 v_ptr = value_ptr(n, index);
812 rc = le32_to_cpu(*v_ptr);
813 *old_rc = rc;
814
815 if (rc > 3) {
816 rc--;
817 *v_ptr = cpu_to_le32(rc);
818 return 0;
819 } else {
820 return __sm_ll_del_overflow(ll, b, ic);
821 }
822
823 }
824 }
825
826 return __sm_ll_dec_overflow(ll, b, ic, old_rc);
827}
828
829/*
830 * Loops round incrementing entries in a single bitmap.
831 */
832static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b,
833 uint32_t bit, uint32_t bit_end,
834 struct inc_context *ic,
835 int32_t *nr_allocations, dm_block_t *new_b)
836{
837 int r;
838 uint32_t old;
839
840 for (; bit != bit_end; bit++, b++) {
841 /*
842 * We only need to drop the bitmap if we need to find a new btree
843 * leaf for the overflow. So if it was dropped last iteration,
844 * we now re-get it.
845 */
846 r = ensure_bitmap(ll, ic);
847 if (r)
848 return r;
849
850 old = sm_lookup_bitmap(ic->bitmap, bit);
851 switch (old) {
852 case 0:
853 DMERR("unable to decrement block");
854 return -EINVAL;
855
856 case 1:
857 /* dec bitmap */
858 sm_set_bitmap(ic->bitmap, bit, 0);
859 (*nr_allocations)--;
860 ll->nr_allocated--;
861 le32_add_cpu(&ic->ie_disk.nr_free, 1);
862 ic->ie_disk.none_free_before =
863 cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit));
864 break;
865
866 case 2:
867 /* dec bitmap and insert into overflow */
868 sm_set_bitmap(ic->bitmap, bit, 1);
869 break;
870
871 case 3:
872 r = sm_ll_dec_overflow(ll, b, ic, &old);
873 if (r < 0)
874 return r;
875
876 if (old == 3) {
877 r = ensure_bitmap(ll, ic);
878 if (r)
879 return r;
880
881 sm_set_bitmap(ic->bitmap, bit, 2);
882 }
883 break;
884 }
885 }
886
887 *new_b = b;
5b564d80 888 return 0;
f722063e 889}
3241b1d3 890
be500ed7
JT
891static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
892 int32_t *nr_allocations, dm_block_t *new_b)
f722063e 893{
be500ed7
JT
894 int r;
895 uint32_t bit, bit_end;
896 struct inc_context ic;
897 dm_block_t index = b;
898
899 init_inc_context(&ic);
900
901 bit = do_div(index, ll->entries_per_block);
902 r = ll->load_ie(ll, index, &ic.ie_disk);
903 if (r < 0)
904 return r;
905
906 r = shadow_bitmap(ll, &ic);
907 if (r)
908 return r;
909
910 bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
911 r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b);
912 exit_inc_context(ll, &ic);
913
914 if (r)
915 return r;
916
917 return ll->save_ie(ll, index, &ic.ie_disk);
3241b1d3
JT
918}
919
be500ed7
JT
920int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
921 int32_t *nr_allocations)
922{
923 *nr_allocations = 0;
924 while (b != e) {
925 int r = __sm_ll_dec(ll, b, e, nr_allocations, &b);
926 if (r)
927 return r;
928 }
929
930 return 0;
931}
932
933/*----------------------------------------------------------------*/
934
3241b1d3
JT
935int sm_ll_commit(struct ll_disk *ll)
936{
f4b90369
JT
937 int r = 0;
938
939 if (ll->bitmap_index_changed) {
940 r = ll->commit(ll);
941 if (!r)
942 ll->bitmap_index_changed = false;
943 }
944
945 return r;
3241b1d3
JT
946}
947
948/*----------------------------------------------------------------*/
949
950static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index,
951 struct disk_index_entry *ie)
952{
953 memcpy(ie, ll->mi_le.index + index, sizeof(*ie));
954 return 0;
955}
956
957static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index,
958 struct disk_index_entry *ie)
959{
f4b90369 960 ll->bitmap_index_changed = true;
3241b1d3
JT
961 memcpy(ll->mi_le.index + index, ie, sizeof(*ie));
962 return 0;
963}
964
965static int metadata_ll_init_index(struct ll_disk *ll)
966{
967 int r;
968 struct dm_block *b;
969
970 r = dm_tm_new_block(ll->tm, &index_validator, &b);
971 if (r < 0)
972 return r;
973
3241b1d3
JT
974 ll->bitmap_root = dm_block_location(b);
975
4c7da06f
MP
976 dm_tm_unlock(ll->tm, b);
977
978 return 0;
3241b1d3
JT
979}
980
981static int metadata_ll_open(struct ll_disk *ll)
982{
983 int r;
984 struct dm_block *block;
985
986 r = dm_tm_read_lock(ll->tm, ll->bitmap_root,
987 &index_validator, &block);
988 if (r)
989 return r;
990
991 memcpy(&ll->mi_le, dm_block_data(block), sizeof(ll->mi_le));
4c7da06f
MP
992 dm_tm_unlock(ll->tm, block);
993
994 return 0;
3241b1d3
JT
995}
996
997static dm_block_t metadata_ll_max_entries(struct ll_disk *ll)
998{
999 return MAX_METADATA_BITMAPS;
1000}
1001
1002static int metadata_ll_commit(struct ll_disk *ll)
1003{
1004 int r, inc;
1005 struct dm_block *b;
1006
1007 r = dm_tm_shadow_block(ll->tm, ll->bitmap_root, &index_validator, &b, &inc);
1008 if (r)
1009 return r;
1010
1011 memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
1012 ll->bitmap_root = dm_block_location(b);
1013
4c7da06f
MP
1014 dm_tm_unlock(ll->tm, b);
1015
1016 return 0;
3241b1d3
JT
1017}
1018
1019int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm)
1020{
1021 int r;
1022
1023 r = sm_ll_init(ll, tm);
1024 if (r < 0)
1025 return r;
1026
1027 ll->load_ie = metadata_ll_load_ie;
1028 ll->save_ie = metadata_ll_save_ie;
1029 ll->init_index = metadata_ll_init_index;
1030 ll->open_index = metadata_ll_open;
1031 ll->max_entries = metadata_ll_max_entries;
1032 ll->commit = metadata_ll_commit;
1033
1034 ll->nr_blocks = 0;
1035 ll->nr_allocated = 0;
1036
1037 r = ll->init_index(ll);
1038 if (r < 0)
1039 return r;
1040
1041 r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1042 if (r < 0)
1043 return r;
1044
1045 return 0;
1046}
1047
1048int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
1049 void *root_le, size_t len)
1050{
1051 int r;
3ba3ba1e 1052 struct disk_sm_root smr;
3241b1d3
JT
1053
1054 if (len < sizeof(struct disk_sm_root)) {
1055 DMERR("sm_metadata root too small");
1056 return -ENOMEM;
1057 }
1058
3ba3ba1e
JT
1059 /*
1060 * We don't know the alignment of the root_le buffer, so need to
1061 * copy into a new structure.
1062 */
1063 memcpy(&smr, root_le, sizeof(smr));
1064
3241b1d3
JT
1065 r = sm_ll_init(ll, tm);
1066 if (r < 0)
1067 return r;
1068
1069 ll->load_ie = metadata_ll_load_ie;
1070 ll->save_ie = metadata_ll_save_ie;
1071 ll->init_index = metadata_ll_init_index;
1072 ll->open_index = metadata_ll_open;
1073 ll->max_entries = metadata_ll_max_entries;
1074 ll->commit = metadata_ll_commit;
1075
3ba3ba1e
JT
1076 ll->nr_blocks = le64_to_cpu(smr.nr_blocks);
1077 ll->nr_allocated = le64_to_cpu(smr.nr_allocated);
1078 ll->bitmap_root = le64_to_cpu(smr.bitmap_root);
1079 ll->ref_count_root = le64_to_cpu(smr.ref_count_root);
3241b1d3
JT
1080
1081 return ll->open_index(ll);
1082}
1083
1084/*----------------------------------------------------------------*/
1085
1086static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index,
1087 struct disk_index_entry *ie)
1088{
1089 return dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
1090}
1091
1092static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index,
1093 struct disk_index_entry *ie)
1094{
1095 __dm_bless_for_disk(ie);
1096 return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
1097 &index, ie, &ll->bitmap_root);
1098}
1099
1100static int disk_ll_init_index(struct ll_disk *ll)
1101{
1102 return dm_btree_empty(&ll->bitmap_info, &ll->bitmap_root);
1103}
1104
1105static int disk_ll_open(struct ll_disk *ll)
1106{
1107 /* nothing to do */
1108 return 0;
1109}
1110
1111static dm_block_t disk_ll_max_entries(struct ll_disk *ll)
1112{
1113 return -1ULL;
1114}
1115
1116static int disk_ll_commit(struct ll_disk *ll)
1117{
1118 return 0;
1119}
1120
1121int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm)
1122{
1123 int r;
1124
1125 r = sm_ll_init(ll, tm);
1126 if (r < 0)
1127 return r;
1128
1129 ll->load_ie = disk_ll_load_ie;
1130 ll->save_ie = disk_ll_save_ie;
1131 ll->init_index = disk_ll_init_index;
1132 ll->open_index = disk_ll_open;
1133 ll->max_entries = disk_ll_max_entries;
1134 ll->commit = disk_ll_commit;
1135
1136 ll->nr_blocks = 0;
1137 ll->nr_allocated = 0;
1138
1139 r = ll->init_index(ll);
1140 if (r < 0)
1141 return r;
1142
1143 r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1144 if (r < 0)
1145 return r;
1146
1147 return 0;
1148}
1149
1150int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm,
1151 void *root_le, size_t len)
1152{
1153 int r;
1154 struct disk_sm_root *smr = root_le;
1155
1156 if (len < sizeof(struct disk_sm_root)) {
1157 DMERR("sm_metadata root too small");
1158 return -ENOMEM;
1159 }
1160
1161 r = sm_ll_init(ll, tm);
1162 if (r < 0)
1163 return r;
1164
1165 ll->load_ie = disk_ll_load_ie;
1166 ll->save_ie = disk_ll_save_ie;
1167 ll->init_index = disk_ll_init_index;
1168 ll->open_index = disk_ll_open;
1169 ll->max_entries = disk_ll_max_entries;
1170 ll->commit = disk_ll_commit;
1171
1172 ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
1173 ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
1174 ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
1175 ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
1176
1177 return ll->open_index(ll);
1178}
1179
1180/*----------------------------------------------------------------*/