Commit | Line | Data |
---|---|---|
7431b783 NT |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* | |
3 | * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved. | |
4 | */ | |
5 | ||
6 | #include <linux/mm.h> | |
7 | #include <linux/err.h> | |
8 | #include <linux/slab.h> | |
9 | #include <linux/rwsem.h> | |
10 | #include <linux/bitops.h> | |
11 | #include <linux/bitmap.h> | |
12 | #include <linux/device-mapper.h> | |
13 | ||
14 | #include "persistent-data/dm-bitset.h" | |
15 | #include "persistent-data/dm-space-map.h" | |
16 | #include "persistent-data/dm-block-manager.h" | |
17 | #include "persistent-data/dm-transaction-manager.h" | |
18 | ||
19 | #include "dm-clone-metadata.h" | |
20 | ||
21 | #define DM_MSG_PREFIX "clone metadata" | |
22 | ||
23 | #define SUPERBLOCK_LOCATION 0 | |
24 | #define SUPERBLOCK_MAGIC 0x8af27f64 | |
25 | #define SUPERBLOCK_CSUM_XOR 257649492 | |
26 | ||
27 | #define DM_CLONE_MAX_CONCURRENT_LOCKS 5 | |
28 | ||
29 | #define UUID_LEN 16 | |
30 | ||
31 | /* Min and max dm-clone metadata versions supported */ | |
32 | #define DM_CLONE_MIN_METADATA_VERSION 1 | |
33 | #define DM_CLONE_MAX_METADATA_VERSION 1 | |
34 | ||
35 | /* | |
36 | * On-disk metadata layout | |
37 | */ | |
38 | struct superblock_disk { | |
39 | __le32 csum; | |
40 | __le32 flags; | |
41 | __le64 blocknr; | |
42 | ||
43 | __u8 uuid[UUID_LEN]; | |
44 | __le64 magic; | |
45 | __le32 version; | |
46 | ||
47 | __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; | |
48 | ||
49 | __le64 region_size; | |
50 | __le64 target_size; | |
51 | ||
52 | __le64 bitset_root; | |
53 | } __packed; | |
54 | ||
55 | /* | |
56 | * Region and Dirty bitmaps. | |
57 | * | |
58 | * dm-clone logically splits the source and destination devices in regions of | |
59 | * fixed size. The destination device's regions are gradually hydrated, i.e., | |
60 | * we copy (clone) the source's regions to the destination device. Eventually, | |
61 | * all regions will get hydrated and all I/O will be served from the | |
62 | * destination device. | |
63 | * | |
64 | * We maintain an on-disk bitmap which tracks the state of each of the | |
65 | * destination device's regions, i.e., whether they are hydrated or not. | |
66 | * | |
67 | * To save constantly doing look ups on disk we keep an in core copy of the | |
68 | * on-disk bitmap, the region_map. | |
69 | * | |
70 | * To further reduce metadata I/O overhead we use a second bitmap, the dmap | |
71 | * (dirty bitmap), which tracks the dirty words, i.e. longs, of the region_map. | |
72 | * | |
73 | * When a region finishes hydrating dm-clone calls | |
74 | * dm_clone_set_region_hydrated(), or for discard requests | |
75 | * dm_clone_cond_set_range(), which sets the corresponding bits in region_map | |
76 | * and dmap. | |
77 | * | |
78 | * During a metadata commit we scan the dmap for dirty region_map words (longs) | |
79 | * and update accordingly the on-disk metadata. Thus, we don't have to flush to | |
80 | * disk the whole region_map. We can just flush the dirty region_map words. | |
81 | * | |
82 | * We use a dirty bitmap, which is smaller than the original region_map, to | |
83 | * reduce the amount of memory accesses during a metadata commit. As dm-bitset | |
84 | * accesses the on-disk bitmap in 64-bit word granularity, there is no | |
85 | * significant benefit in tracking the dirty region_map bits with a smaller | |
86 | * granularity. | |
87 | * | |
88 | * We could update directly the on-disk bitmap, when dm-clone calls either | |
89 | * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), buts this | |
90 | * inserts significant metadata I/O overhead in dm-clone's I/O path. Also, as | |
91 | * these two functions don't block, we can call them in interrupt context, | |
92 | * e.g., in a hooked overwrite bio's completion routine, and further reduce the | |
93 | * I/O completion latency. | |
94 | * | |
95 | * We maintain two dirty bitmaps. During a metadata commit we atomically swap | |
96 | * the currently used dmap with the unused one. This allows the metadata update | |
97 | * functions to run concurrently with an ongoing commit. | |
98 | */ | |
99 | struct dirty_map { | |
100 | unsigned long *dirty_words; | |
101 | unsigned int changed; | |
102 | }; | |
103 | ||
104 | struct dm_clone_metadata { | |
105 | /* The metadata block device */ | |
106 | struct block_device *bdev; | |
107 | ||
108 | sector_t target_size; | |
109 | sector_t region_size; | |
110 | unsigned long nr_regions; | |
111 | unsigned long nr_words; | |
112 | ||
113 | /* Spinlock protecting the region and dirty bitmaps. */ | |
114 | spinlock_t bitmap_lock; | |
115 | struct dirty_map dmap[2]; | |
116 | struct dirty_map *current_dmap; | |
117 | ||
118 | /* | |
119 | * In core copy of the on-disk bitmap to save constantly doing look ups | |
120 | * on disk. | |
121 | */ | |
122 | unsigned long *region_map; | |
123 | ||
124 | /* Protected by bitmap_lock */ | |
125 | unsigned int read_only; | |
126 | ||
127 | struct dm_block_manager *bm; | |
128 | struct dm_space_map *sm; | |
129 | struct dm_transaction_manager *tm; | |
130 | ||
131 | struct rw_semaphore lock; | |
132 | ||
133 | struct dm_disk_bitset bitset_info; | |
134 | dm_block_t bitset_root; | |
135 | ||
136 | /* | |
137 | * Reading the space map root can fail, so we read it into this | |
138 | * buffer before the superblock is locked and updated. | |
139 | */ | |
140 | __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; | |
141 | ||
142 | bool hydration_done:1; | |
143 | bool fail_io:1; | |
144 | }; | |
145 | ||
146 | /*---------------------------------------------------------------------------*/ | |
147 | ||
148 | /* | |
149 | * Superblock validation. | |
150 | */ | |
151 | static void sb_prepare_for_write(struct dm_block_validator *v, | |
152 | struct dm_block *b, size_t sb_block_size) | |
153 | { | |
154 | struct superblock_disk *sb; | |
155 | u32 csum; | |
156 | ||
157 | sb = dm_block_data(b); | |
158 | sb->blocknr = cpu_to_le64(dm_block_location(b)); | |
159 | ||
160 | csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32), | |
161 | SUPERBLOCK_CSUM_XOR); | |
162 | sb->csum = cpu_to_le32(csum); | |
163 | } | |
164 | ||
165 | static int sb_check(struct dm_block_validator *v, struct dm_block *b, | |
166 | size_t sb_block_size) | |
167 | { | |
168 | struct superblock_disk *sb; | |
169 | u32 csum, metadata_version; | |
170 | ||
171 | sb = dm_block_data(b); | |
172 | ||
173 | if (dm_block_location(b) != le64_to_cpu(sb->blocknr)) { | |
174 | DMERR("Superblock check failed: blocknr %llu, expected %llu", | |
175 | le64_to_cpu(sb->blocknr), | |
176 | (unsigned long long)dm_block_location(b)); | |
177 | return -ENOTBLK; | |
178 | } | |
179 | ||
180 | if (le64_to_cpu(sb->magic) != SUPERBLOCK_MAGIC) { | |
181 | DMERR("Superblock check failed: magic %llu, expected %llu", | |
182 | le64_to_cpu(sb->magic), | |
183 | (unsigned long long)SUPERBLOCK_MAGIC); | |
184 | return -EILSEQ; | |
185 | } | |
186 | ||
187 | csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32), | |
188 | SUPERBLOCK_CSUM_XOR); | |
189 | if (sb->csum != cpu_to_le32(csum)) { | |
190 | DMERR("Superblock check failed: checksum %u, expected %u", | |
191 | csum, le32_to_cpu(sb->csum)); | |
192 | return -EILSEQ; | |
193 | } | |
194 | ||
195 | /* Check metadata version */ | |
196 | metadata_version = le32_to_cpu(sb->version); | |
197 | if (metadata_version < DM_CLONE_MIN_METADATA_VERSION || | |
198 | metadata_version > DM_CLONE_MAX_METADATA_VERSION) { | |
199 | DMERR("Clone metadata version %u found, but only versions between %u and %u supported.", | |
200 | metadata_version, DM_CLONE_MIN_METADATA_VERSION, | |
201 | DM_CLONE_MAX_METADATA_VERSION); | |
202 | return -EINVAL; | |
203 | } | |
204 | ||
205 | return 0; | |
206 | } | |
207 | ||
208 | static struct dm_block_validator sb_validator = { | |
209 | .name = "superblock", | |
210 | .prepare_for_write = sb_prepare_for_write, | |
211 | .check = sb_check | |
212 | }; | |
213 | ||
214 | /* | |
215 | * Check if the superblock is formatted or not. We consider the superblock to | |
216 | * be formatted in case we find non-zero bytes in it. | |
217 | */ | |
218 | static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *formatted) | |
219 | { | |
220 | int r; | |
221 | unsigned int i, nr_words; | |
222 | struct dm_block *sblock; | |
223 | __le64 *data_le, zero = cpu_to_le64(0); | |
224 | ||
225 | /* | |
226 | * We don't use a validator here because the superblock could be all | |
227 | * zeroes. | |
228 | */ | |
229 | r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &sblock); | |
230 | if (r) { | |
231 | DMERR("Failed to read_lock superblock"); | |
232 | return r; | |
233 | } | |
234 | ||
235 | data_le = dm_block_data(sblock); | |
236 | *formatted = false; | |
237 | ||
238 | /* This assumes that the block size is a multiple of 8 bytes */ | |
239 | BUG_ON(dm_bm_block_size(bm) % sizeof(__le64)); | |
240 | nr_words = dm_bm_block_size(bm) / sizeof(__le64); | |
241 | for (i = 0; i < nr_words; i++) { | |
242 | if (data_le[i] != zero) { | |
243 | *formatted = true; | |
244 | break; | |
245 | } | |
246 | } | |
247 | ||
248 | dm_bm_unlock(sblock); | |
249 | ||
250 | return 0; | |
251 | } | |
252 | ||
253 | /*---------------------------------------------------------------------------*/ | |
254 | ||
255 | /* | |
256 | * Low-level metadata handling. | |
257 | */ | |
258 | static inline int superblock_read_lock(struct dm_clone_metadata *cmd, | |
259 | struct dm_block **sblock) | |
260 | { | |
261 | return dm_bm_read_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock); | |
262 | } | |
263 | ||
264 | static inline int superblock_write_lock(struct dm_clone_metadata *cmd, | |
265 | struct dm_block **sblock) | |
266 | { | |
267 | return dm_bm_write_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock); | |
268 | } | |
269 | ||
270 | static inline int superblock_write_lock_zero(struct dm_clone_metadata *cmd, | |
271 | struct dm_block **sblock) | |
272 | { | |
273 | return dm_bm_write_lock_zero(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock); | |
274 | } | |
275 | ||
276 | static int __copy_sm_root(struct dm_clone_metadata *cmd) | |
277 | { | |
278 | int r; | |
279 | size_t root_size; | |
280 | ||
281 | r = dm_sm_root_size(cmd->sm, &root_size); | |
282 | if (r) | |
283 | return r; | |
284 | ||
285 | return dm_sm_copy_root(cmd->sm, &cmd->metadata_space_map_root, root_size); | |
286 | } | |
287 | ||
288 | /* Save dm-clone metadata in superblock */ | |
289 | static void __prepare_superblock(struct dm_clone_metadata *cmd, | |
290 | struct superblock_disk *sb) | |
291 | { | |
292 | sb->flags = cpu_to_le32(0UL); | |
293 | ||
294 | /* FIXME: UUID is currently unused */ | |
295 | memset(sb->uuid, 0, sizeof(sb->uuid)); | |
296 | ||
297 | sb->magic = cpu_to_le64(SUPERBLOCK_MAGIC); | |
298 | sb->version = cpu_to_le32(DM_CLONE_MAX_METADATA_VERSION); | |
299 | ||
300 | /* Save the metadata space_map root */ | |
301 | memcpy(&sb->metadata_space_map_root, &cmd->metadata_space_map_root, | |
302 | sizeof(cmd->metadata_space_map_root)); | |
303 | ||
304 | sb->region_size = cpu_to_le64(cmd->region_size); | |
305 | sb->target_size = cpu_to_le64(cmd->target_size); | |
306 | sb->bitset_root = cpu_to_le64(cmd->bitset_root); | |
307 | } | |
308 | ||
309 | static int __open_metadata(struct dm_clone_metadata *cmd) | |
310 | { | |
311 | int r; | |
312 | struct dm_block *sblock; | |
313 | struct superblock_disk *sb; | |
314 | ||
315 | r = superblock_read_lock(cmd, &sblock); | |
316 | ||
317 | if (r) { | |
318 | DMERR("Failed to read_lock superblock"); | |
319 | return r; | |
320 | } | |
321 | ||
322 | sb = dm_block_data(sblock); | |
323 | ||
324 | /* Verify that target_size and region_size haven't changed. */ | |
325 | if (cmd->region_size != le64_to_cpu(sb->region_size) || | |
326 | cmd->target_size != le64_to_cpu(sb->target_size)) { | |
327 | DMERR("Region and/or target size don't match the ones in metadata"); | |
328 | r = -EINVAL; | |
329 | goto out_with_lock; | |
330 | } | |
331 | ||
332 | r = dm_tm_open_with_sm(cmd->bm, SUPERBLOCK_LOCATION, | |
333 | sb->metadata_space_map_root, | |
334 | sizeof(sb->metadata_space_map_root), | |
335 | &cmd->tm, &cmd->sm); | |
336 | ||
337 | if (r) { | |
338 | DMERR("dm_tm_open_with_sm failed"); | |
339 | goto out_with_lock; | |
340 | } | |
341 | ||
342 | dm_disk_bitset_init(cmd->tm, &cmd->bitset_info); | |
343 | cmd->bitset_root = le64_to_cpu(sb->bitset_root); | |
344 | ||
345 | out_with_lock: | |
346 | dm_bm_unlock(sblock); | |
347 | ||
348 | return r; | |
349 | } | |
350 | ||
351 | static int __format_metadata(struct dm_clone_metadata *cmd) | |
352 | { | |
353 | int r; | |
354 | struct dm_block *sblock; | |
355 | struct superblock_disk *sb; | |
356 | ||
357 | r = dm_tm_create_with_sm(cmd->bm, SUPERBLOCK_LOCATION, &cmd->tm, &cmd->sm); | |
358 | if (r) { | |
359 | DMERR("Failed to create transaction manager"); | |
360 | return r; | |
361 | } | |
362 | ||
363 | dm_disk_bitset_init(cmd->tm, &cmd->bitset_info); | |
364 | ||
365 | r = dm_bitset_empty(&cmd->bitset_info, &cmd->bitset_root); | |
366 | if (r) { | |
367 | DMERR("Failed to create empty on-disk bitset"); | |
368 | goto err_with_tm; | |
369 | } | |
370 | ||
371 | r = dm_bitset_resize(&cmd->bitset_info, cmd->bitset_root, 0, | |
372 | cmd->nr_regions, false, &cmd->bitset_root); | |
373 | if (r) { | |
374 | DMERR("Failed to resize on-disk bitset to %lu entries", cmd->nr_regions); | |
375 | goto err_with_tm; | |
376 | } | |
377 | ||
378 | /* Flush to disk all blocks, except the superblock */ | |
379 | r = dm_tm_pre_commit(cmd->tm); | |
380 | if (r) { | |
381 | DMERR("dm_tm_pre_commit failed"); | |
382 | goto err_with_tm; | |
383 | } | |
384 | ||
385 | r = __copy_sm_root(cmd); | |
386 | if (r) { | |
387 | DMERR("__copy_sm_root failed"); | |
388 | goto err_with_tm; | |
389 | } | |
390 | ||
391 | r = superblock_write_lock_zero(cmd, &sblock); | |
392 | if (r) { | |
393 | DMERR("Failed to write_lock superblock"); | |
394 | goto err_with_tm; | |
395 | } | |
396 | ||
397 | sb = dm_block_data(sblock); | |
398 | __prepare_superblock(cmd, sb); | |
399 | r = dm_tm_commit(cmd->tm, sblock); | |
400 | if (r) { | |
401 | DMERR("Failed to commit superblock"); | |
402 | goto err_with_tm; | |
403 | } | |
404 | ||
405 | return 0; | |
406 | ||
407 | err_with_tm: | |
408 | dm_sm_destroy(cmd->sm); | |
409 | dm_tm_destroy(cmd->tm); | |
410 | ||
411 | return r; | |
412 | } | |
413 | ||
414 | static int __open_or_format_metadata(struct dm_clone_metadata *cmd, bool may_format_device) | |
415 | { | |
416 | int r; | |
417 | bool formatted = false; | |
418 | ||
419 | r = __superblock_all_zeroes(cmd->bm, &formatted); | |
420 | if (r) | |
421 | return r; | |
422 | ||
423 | if (!formatted) | |
424 | return may_format_device ? __format_metadata(cmd) : -EPERM; | |
425 | ||
426 | return __open_metadata(cmd); | |
427 | } | |
428 | ||
429 | static int __create_persistent_data_structures(struct dm_clone_metadata *cmd, | |
430 | bool may_format_device) | |
431 | { | |
432 | int r; | |
433 | ||
434 | /* Create block manager */ | |
435 | cmd->bm = dm_block_manager_create(cmd->bdev, | |
436 | DM_CLONE_METADATA_BLOCK_SIZE << SECTOR_SHIFT, | |
437 | DM_CLONE_MAX_CONCURRENT_LOCKS); | |
438 | if (IS_ERR(cmd->bm)) { | |
439 | DMERR("Failed to create block manager"); | |
440 | return PTR_ERR(cmd->bm); | |
441 | } | |
442 | ||
443 | r = __open_or_format_metadata(cmd, may_format_device); | |
444 | if (r) | |
445 | dm_block_manager_destroy(cmd->bm); | |
446 | ||
447 | return r; | |
448 | } | |
449 | ||
450 | static void __destroy_persistent_data_structures(struct dm_clone_metadata *cmd) | |
451 | { | |
452 | dm_sm_destroy(cmd->sm); | |
453 | dm_tm_destroy(cmd->tm); | |
454 | dm_block_manager_destroy(cmd->bm); | |
455 | } | |
456 | ||
457 | /*---------------------------------------------------------------------------*/ | |
458 | ||
459 | static size_t bitmap_size(unsigned long nr_bits) | |
460 | { | |
461 | return BITS_TO_LONGS(nr_bits) * sizeof(long); | |
462 | } | |
463 | ||
464 | static int dirty_map_init(struct dm_clone_metadata *cmd) | |
465 | { | |
466 | cmd->dmap[0].changed = 0; | |
467 | cmd->dmap[0].dirty_words = kvzalloc(bitmap_size(cmd->nr_words), GFP_KERNEL); | |
468 | ||
469 | if (!cmd->dmap[0].dirty_words) { | |
470 | DMERR("Failed to allocate dirty bitmap"); | |
471 | return -ENOMEM; | |
472 | } | |
473 | ||
474 | cmd->dmap[1].changed = 0; | |
475 | cmd->dmap[1].dirty_words = kvzalloc(bitmap_size(cmd->nr_words), GFP_KERNEL); | |
476 | ||
477 | if (!cmd->dmap[1].dirty_words) { | |
478 | DMERR("Failed to allocate dirty bitmap"); | |
479 | kvfree(cmd->dmap[0].dirty_words); | |
480 | return -ENOMEM; | |
481 | } | |
482 | ||
483 | cmd->current_dmap = &cmd->dmap[0]; | |
484 | ||
485 | return 0; | |
486 | } | |
487 | ||
488 | static void dirty_map_exit(struct dm_clone_metadata *cmd) | |
489 | { | |
490 | kvfree(cmd->dmap[0].dirty_words); | |
491 | kvfree(cmd->dmap[1].dirty_words); | |
492 | } | |
493 | ||
494 | static int __load_bitset_in_core(struct dm_clone_metadata *cmd) | |
495 | { | |
496 | int r; | |
497 | unsigned long i; | |
498 | struct dm_bitset_cursor c; | |
499 | ||
500 | /* Flush bitset cache */ | |
501 | r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root); | |
502 | if (r) | |
503 | return r; | |
504 | ||
505 | r = dm_bitset_cursor_begin(&cmd->bitset_info, cmd->bitset_root, cmd->nr_regions, &c); | |
506 | if (r) | |
507 | return r; | |
508 | ||
509 | for (i = 0; ; i++) { | |
510 | if (dm_bitset_cursor_get_value(&c)) | |
511 | __set_bit(i, cmd->region_map); | |
512 | else | |
513 | __clear_bit(i, cmd->region_map); | |
514 | ||
515 | if (i >= (cmd->nr_regions - 1)) | |
516 | break; | |
517 | ||
518 | r = dm_bitset_cursor_next(&c); | |
519 | ||
520 | if (r) | |
521 | break; | |
522 | } | |
523 | ||
524 | dm_bitset_cursor_end(&c); | |
525 | ||
526 | return r; | |
527 | } | |
528 | ||
529 | struct dm_clone_metadata *dm_clone_metadata_open(struct block_device *bdev, | |
530 | sector_t target_size, | |
531 | sector_t region_size) | |
532 | { | |
533 | int r; | |
534 | struct dm_clone_metadata *cmd; | |
535 | ||
536 | cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); | |
537 | if (!cmd) { | |
538 | DMERR("Failed to allocate memory for dm-clone metadata"); | |
539 | return ERR_PTR(-ENOMEM); | |
540 | } | |
541 | ||
542 | cmd->bdev = bdev; | |
543 | cmd->target_size = target_size; | |
544 | cmd->region_size = region_size; | |
545 | cmd->nr_regions = dm_sector_div_up(cmd->target_size, cmd->region_size); | |
546 | cmd->nr_words = BITS_TO_LONGS(cmd->nr_regions); | |
547 | ||
548 | init_rwsem(&cmd->lock); | |
549 | spin_lock_init(&cmd->bitmap_lock); | |
550 | cmd->read_only = 0; | |
551 | cmd->fail_io = false; | |
552 | cmd->hydration_done = false; | |
553 | ||
554 | cmd->region_map = kvmalloc(bitmap_size(cmd->nr_regions), GFP_KERNEL); | |
555 | if (!cmd->region_map) { | |
556 | DMERR("Failed to allocate memory for region bitmap"); | |
557 | r = -ENOMEM; | |
558 | goto out_with_md; | |
559 | } | |
560 | ||
561 | r = __create_persistent_data_structures(cmd, true); | |
562 | if (r) | |
563 | goto out_with_region_map; | |
564 | ||
565 | r = __load_bitset_in_core(cmd); | |
566 | if (r) { | |
567 | DMERR("Failed to load on-disk region map"); | |
568 | goto out_with_pds; | |
569 | } | |
570 | ||
571 | r = dirty_map_init(cmd); | |
572 | if (r) | |
573 | goto out_with_pds; | |
574 | ||
575 | if (bitmap_full(cmd->region_map, cmd->nr_regions)) | |
576 | cmd->hydration_done = true; | |
577 | ||
578 | return cmd; | |
579 | ||
580 | out_with_pds: | |
581 | __destroy_persistent_data_structures(cmd); | |
582 | ||
583 | out_with_region_map: | |
584 | kvfree(cmd->region_map); | |
585 | ||
586 | out_with_md: | |
587 | kfree(cmd); | |
588 | ||
589 | return ERR_PTR(r); | |
590 | } | |
591 | ||
592 | void dm_clone_metadata_close(struct dm_clone_metadata *cmd) | |
593 | { | |
594 | if (!cmd->fail_io) | |
595 | __destroy_persistent_data_structures(cmd); | |
596 | ||
597 | dirty_map_exit(cmd); | |
598 | kvfree(cmd->region_map); | |
599 | kfree(cmd); | |
600 | } | |
601 | ||
602 | bool dm_clone_is_hydration_done(struct dm_clone_metadata *cmd) | |
603 | { | |
604 | return cmd->hydration_done; | |
605 | } | |
606 | ||
607 | bool dm_clone_is_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr) | |
608 | { | |
609 | return dm_clone_is_hydration_done(cmd) || test_bit(region_nr, cmd->region_map); | |
610 | } | |
611 | ||
612 | bool dm_clone_is_range_hydrated(struct dm_clone_metadata *cmd, | |
613 | unsigned long start, unsigned long nr_regions) | |
614 | { | |
615 | unsigned long bit; | |
616 | ||
617 | if (dm_clone_is_hydration_done(cmd)) | |
618 | return true; | |
619 | ||
620 | bit = find_next_zero_bit(cmd->region_map, cmd->nr_regions, start); | |
621 | ||
622 | return (bit >= (start + nr_regions)); | |
623 | } | |
624 | ||
625 | unsigned long dm_clone_nr_of_hydrated_regions(struct dm_clone_metadata *cmd) | |
626 | { | |
627 | return bitmap_weight(cmd->region_map, cmd->nr_regions); | |
628 | } | |
629 | ||
630 | unsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd, | |
631 | unsigned long start) | |
632 | { | |
633 | return find_next_zero_bit(cmd->region_map, cmd->nr_regions, start); | |
634 | } | |
635 | ||
636 | static int __update_metadata_word(struct dm_clone_metadata *cmd, unsigned long word) | |
637 | { | |
638 | int r; | |
639 | unsigned long index = word * BITS_PER_LONG; | |
640 | unsigned long max_index = min(cmd->nr_regions, (word + 1) * BITS_PER_LONG); | |
641 | ||
642 | while (index < max_index) { | |
643 | if (test_bit(index, cmd->region_map)) { | |
644 | r = dm_bitset_set_bit(&cmd->bitset_info, cmd->bitset_root, | |
645 | index, &cmd->bitset_root); | |
646 | ||
647 | if (r) { | |
648 | DMERR("dm_bitset_set_bit failed"); | |
649 | return r; | |
650 | } | |
651 | } | |
652 | index++; | |
653 | } | |
654 | ||
655 | return 0; | |
656 | } | |
657 | ||
658 | static int __metadata_commit(struct dm_clone_metadata *cmd) | |
659 | { | |
660 | int r; | |
661 | struct dm_block *sblock; | |
662 | struct superblock_disk *sb; | |
663 | ||
664 | /* Flush bitset cache */ | |
665 | r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root); | |
666 | if (r) { | |
667 | DMERR("dm_bitset_flush failed"); | |
668 | return r; | |
669 | } | |
670 | ||
671 | /* Flush to disk all blocks, except the superblock */ | |
672 | r = dm_tm_pre_commit(cmd->tm); | |
673 | if (r) { | |
674 | DMERR("dm_tm_pre_commit failed"); | |
675 | return r; | |
676 | } | |
677 | ||
678 | /* Save the space map root in cmd->metadata_space_map_root */ | |
679 | r = __copy_sm_root(cmd); | |
680 | if (r) { | |
681 | DMERR("__copy_sm_root failed"); | |
682 | return r; | |
683 | } | |
684 | ||
685 | /* Lock the superblock */ | |
686 | r = superblock_write_lock_zero(cmd, &sblock); | |
687 | if (r) { | |
688 | DMERR("Failed to write_lock superblock"); | |
689 | return r; | |
690 | } | |
691 | ||
692 | /* Save the metadata in superblock */ | |
693 | sb = dm_block_data(sblock); | |
694 | __prepare_superblock(cmd, sb); | |
695 | ||
696 | /* Unlock superblock and commit it to disk */ | |
697 | r = dm_tm_commit(cmd->tm, sblock); | |
698 | if (r) { | |
699 | DMERR("Failed to commit superblock"); | |
700 | return r; | |
701 | } | |
702 | ||
703 | /* | |
704 | * FIXME: Find a more efficient way to check if the hydration is done. | |
705 | */ | |
706 | if (bitmap_full(cmd->region_map, cmd->nr_regions)) | |
707 | cmd->hydration_done = true; | |
708 | ||
709 | return 0; | |
710 | } | |
711 | ||
712 | static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap) | |
713 | { | |
714 | int r; | |
715 | unsigned long word, flags; | |
716 | ||
717 | word = 0; | |
718 | do { | |
719 | word = find_next_bit(dmap->dirty_words, cmd->nr_words, word); | |
720 | ||
721 | if (word == cmd->nr_words) | |
722 | break; | |
723 | ||
724 | r = __update_metadata_word(cmd, word); | |
725 | ||
726 | if (r) | |
727 | return r; | |
728 | ||
729 | __clear_bit(word, dmap->dirty_words); | |
730 | word++; | |
731 | } while (word < cmd->nr_words); | |
732 | ||
733 | r = __metadata_commit(cmd); | |
734 | ||
735 | if (r) | |
736 | return r; | |
737 | ||
738 | /* Update the changed flag */ | |
739 | spin_lock_irqsave(&cmd->bitmap_lock, flags); | |
740 | dmap->changed = 0; | |
741 | spin_unlock_irqrestore(&cmd->bitmap_lock, flags); | |
742 | ||
743 | return 0; | |
744 | } | |
745 | ||
746 | int dm_clone_metadata_commit(struct dm_clone_metadata *cmd) | |
747 | { | |
748 | int r = -EPERM; | |
749 | unsigned long flags; | |
750 | struct dirty_map *dmap, *next_dmap; | |
751 | ||
752 | down_write(&cmd->lock); | |
753 | ||
754 | if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) | |
755 | goto out; | |
756 | ||
757 | /* Get current dirty bitmap */ | |
758 | dmap = cmd->current_dmap; | |
759 | ||
760 | /* Get next dirty bitmap */ | |
761 | next_dmap = (dmap == &cmd->dmap[0]) ? &cmd->dmap[1] : &cmd->dmap[0]; | |
762 | ||
763 | /* | |
764 | * The last commit failed, so we don't have a clean dirty-bitmap to | |
765 | * use. | |
766 | */ | |
767 | if (WARN_ON(next_dmap->changed)) { | |
768 | r = -EINVAL; | |
769 | goto out; | |
770 | } | |
771 | ||
772 | /* Swap dirty bitmaps */ | |
773 | spin_lock_irqsave(&cmd->bitmap_lock, flags); | |
774 | cmd->current_dmap = next_dmap; | |
775 | spin_unlock_irqrestore(&cmd->bitmap_lock, flags); | |
776 | ||
777 | /* | |
778 | * No one is accessing the old dirty bitmap anymore, so we can flush | |
779 | * it. | |
780 | */ | |
781 | r = __flush_dmap(cmd, dmap); | |
782 | out: | |
783 | up_write(&cmd->lock); | |
784 | ||
785 | return r; | |
786 | } | |
787 | ||
788 | int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr) | |
789 | { | |
790 | int r = 0; | |
791 | struct dirty_map *dmap; | |
792 | unsigned long word, flags; | |
793 | ||
794 | word = region_nr / BITS_PER_LONG; | |
795 | ||
796 | spin_lock_irqsave(&cmd->bitmap_lock, flags); | |
797 | ||
798 | if (cmd->read_only) { | |
799 | r = -EPERM; | |
800 | goto out; | |
801 | } | |
802 | ||
803 | dmap = cmd->current_dmap; | |
804 | ||
805 | __set_bit(word, dmap->dirty_words); | |
806 | __set_bit(region_nr, cmd->region_map); | |
807 | dmap->changed = 1; | |
808 | ||
809 | out: | |
810 | spin_unlock_irqrestore(&cmd->bitmap_lock, flags); | |
811 | ||
812 | return r; | |
813 | } | |
814 | ||
815 | int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start, | |
816 | unsigned long nr_regions) | |
817 | { | |
818 | int r = 0; | |
819 | struct dirty_map *dmap; | |
820 | unsigned long word, region_nr, flags; | |
821 | ||
822 | spin_lock_irqsave(&cmd->bitmap_lock, flags); | |
823 | ||
824 | if (cmd->read_only) { | |
825 | r = -EPERM; | |
826 | goto out; | |
827 | } | |
828 | ||
829 | dmap = cmd->current_dmap; | |
830 | for (region_nr = start; region_nr < (start + nr_regions); region_nr++) { | |
831 | if (!test_bit(region_nr, cmd->region_map)) { | |
832 | word = region_nr / BITS_PER_LONG; | |
833 | __set_bit(word, dmap->dirty_words); | |
834 | __set_bit(region_nr, cmd->region_map); | |
835 | dmap->changed = 1; | |
836 | } | |
837 | } | |
838 | out: | |
839 | spin_unlock_irqrestore(&cmd->bitmap_lock, flags); | |
840 | ||
841 | return r; | |
842 | } | |
843 | ||
844 | /* | |
845 | * WARNING: This must not be called concurrently with either | |
846 | * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), as it changes | |
847 | * cmd->region_map without taking the cmd->bitmap_lock spinlock. The only | |
848 | * exception is after setting the metadata to read-only mode, using | |
849 | * dm_clone_metadata_set_read_only(). | |
850 | * | |
851 | * We don't take the spinlock because __load_bitset_in_core() does I/O, so it | |
852 | * may block. | |
853 | */ | |
854 | int dm_clone_reload_in_core_bitset(struct dm_clone_metadata *cmd) | |
855 | { | |
856 | int r = -EINVAL; | |
857 | ||
858 | down_write(&cmd->lock); | |
859 | ||
860 | if (cmd->fail_io) | |
861 | goto out; | |
862 | ||
863 | r = __load_bitset_in_core(cmd); | |
864 | out: | |
865 | up_write(&cmd->lock); | |
866 | ||
867 | return r; | |
868 | } | |
869 | ||
870 | bool dm_clone_changed_this_transaction(struct dm_clone_metadata *cmd) | |
871 | { | |
872 | bool r; | |
873 | unsigned long flags; | |
874 | ||
875 | spin_lock_irqsave(&cmd->bitmap_lock, flags); | |
876 | r = cmd->dmap[0].changed || cmd->dmap[1].changed; | |
877 | spin_unlock_irqrestore(&cmd->bitmap_lock, flags); | |
878 | ||
879 | return r; | |
880 | } | |
881 | ||
882 | int dm_clone_metadata_abort(struct dm_clone_metadata *cmd) | |
883 | { | |
884 | int r = -EPERM; | |
885 | ||
886 | down_write(&cmd->lock); | |
887 | ||
888 | if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) | |
889 | goto out; | |
890 | ||
891 | __destroy_persistent_data_structures(cmd); | |
892 | ||
893 | r = __create_persistent_data_structures(cmd, false); | |
894 | if (r) { | |
895 | /* If something went wrong we can neither write nor read the metadata */ | |
896 | cmd->fail_io = true; | |
897 | } | |
898 | out: | |
899 | up_write(&cmd->lock); | |
900 | ||
901 | return r; | |
902 | } | |
903 | ||
904 | void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd) | |
905 | { | |
906 | unsigned long flags; | |
907 | ||
908 | down_write(&cmd->lock); | |
909 | ||
910 | spin_lock_irqsave(&cmd->bitmap_lock, flags); | |
911 | cmd->read_only = 1; | |
912 | spin_unlock_irqrestore(&cmd->bitmap_lock, flags); | |
913 | ||
914 | if (!cmd->fail_io) | |
915 | dm_bm_set_read_only(cmd->bm); | |
916 | ||
917 | up_write(&cmd->lock); | |
918 | } | |
919 | ||
920 | void dm_clone_metadata_set_read_write(struct dm_clone_metadata *cmd) | |
921 | { | |
922 | unsigned long flags; | |
923 | ||
924 | down_write(&cmd->lock); | |
925 | ||
926 | spin_lock_irqsave(&cmd->bitmap_lock, flags); | |
927 | cmd->read_only = 0; | |
928 | spin_unlock_irqrestore(&cmd->bitmap_lock, flags); | |
929 | ||
930 | if (!cmd->fail_io) | |
931 | dm_bm_set_read_write(cmd->bm); | |
932 | ||
933 | up_write(&cmd->lock); | |
934 | } | |
935 | ||
936 | int dm_clone_get_free_metadata_block_count(struct dm_clone_metadata *cmd, | |
937 | dm_block_t *result) | |
938 | { | |
939 | int r = -EINVAL; | |
940 | ||
941 | down_read(&cmd->lock); | |
942 | ||
943 | if (!cmd->fail_io) | |
944 | r = dm_sm_get_nr_free(cmd->sm, result); | |
945 | ||
946 | up_read(&cmd->lock); | |
947 | ||
948 | return r; | |
949 | } | |
950 | ||
951 | int dm_clone_get_metadata_dev_size(struct dm_clone_metadata *cmd, | |
952 | dm_block_t *result) | |
953 | { | |
954 | int r = -EINVAL; | |
955 | ||
956 | down_read(&cmd->lock); | |
957 | ||
958 | if (!cmd->fail_io) | |
959 | r = dm_sm_get_nr_blocks(cmd->sm, result); | |
960 | ||
961 | up_read(&cmd->lock); | |
962 | ||
963 | return r; | |
964 | } |