1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright 2023 Red Hat
8 #include <linux/log2.h>
11 #include "memory-alloc.h"
12 #include "permassert.h"
14 #include "constants.h"
15 #include "status-codes.h"
18 struct geometry_block {
19 char magic_number[VDO_GEOMETRY_MAGIC_NUMBER_SIZE];
20 struct packed_header header;
24 static const struct header GEOMETRY_BLOCK_HEADER_5_0 = {
25 .id = VDO_GEOMETRY_BLOCK,
31 * Note: this size isn't just the payload size following the header, like it is everywhere
34 .size = sizeof(struct geometry_block) + sizeof(struct volume_geometry),
37 static const struct header GEOMETRY_BLOCK_HEADER_4_0 = {
38 .id = VDO_GEOMETRY_BLOCK,
44 * Note: this size isn't just the payload size following the header, like it is everywhere
47 .size = sizeof(struct geometry_block) + sizeof(struct volume_geometry_4_0),
50 const u8 VDO_GEOMETRY_MAGIC_NUMBER[VDO_GEOMETRY_MAGIC_NUMBER_SIZE + 1] = "dmvdo001";
53 PAGE_HEADER_4_1_SIZE = 8 + 8 + 8 + 1 + 1 + 1 + 1,
56 static const struct version_number BLOCK_MAP_4_1 = {
61 const struct header VDO_BLOCK_MAP_HEADER_2_0 = {
67 .size = sizeof(struct block_map_state_2_0),
70 const struct header VDO_RECOVERY_JOURNAL_HEADER_7_0 = {
71 .id = VDO_RECOVERY_JOURNAL,
76 .size = sizeof(struct recovery_journal_state_7_0),
79 const struct header VDO_SLAB_DEPOT_HEADER_2_0 = {
85 .size = sizeof(struct slab_depot_state_2_0),
88 static const struct header VDO_LAYOUT_HEADER_3_0 = {
94 .size = sizeof(struct layout_3_0) + (sizeof(struct partition_3_0) * VDO_PARTITION_COUNT),
97 static const enum partition_id REQUIRED_PARTITIONS[] = {
98 VDO_BLOCK_MAP_PARTITION,
99 VDO_SLAB_DEPOT_PARTITION,
100 VDO_RECOVERY_JOURNAL_PARTITION,
101 VDO_SLAB_SUMMARY_PARTITION,
105 * The current version for the data encoded in the super block. This must be changed any time there
106 * is a change to encoding of the component data of any VDO component.
108 static const struct version_number VDO_COMPONENT_DATA_41_0 = {
113 const struct version_number VDO_VOLUME_VERSION_67_0 = {
118 static const struct header SUPER_BLOCK_HEADER_12_0 = {
119 .id = VDO_SUPER_BLOCK,
125 /* This is the minimum size, if the super block contains no components. */
126 .size = VDO_SUPER_BLOCK_FIXED_SIZE - VDO_ENCODED_HEADER_SIZE,
130 * validate_version() - Check whether a version matches an expected version.
131 * @expected_version: The expected version.
132 * @actual_version: The version being validated.
133 * @component_name: The name of the component or the calling function (for error logging).
135 * Logs an error describing a mismatch.
137 * Return: VDO_SUCCESS if the versions are the same,
138 * VDO_UNSUPPORTED_VERSION if the versions don't match.
140 static int __must_check validate_version(struct version_number expected_version,
141 struct version_number actual_version,
142 const char *component_name)
144 if (!vdo_are_same_version(expected_version, actual_version)) {
145 return uds_log_error_strerror(VDO_UNSUPPORTED_VERSION,
146 "%s version mismatch, expected %d.%d, got %d.%d",
148 expected_version.major_version,
149 expected_version.minor_version,
150 actual_version.major_version,
151 actual_version.minor_version);
158 * vdo_validate_header() - Check whether a header matches expectations.
159 * @expected_header: The expected header.
160 * @actual_header: The header being validated.
161 * @exact_size: If true, the size fields of the two headers must be the same, otherwise it is
162 * required that actual_header.size >= expected_header.size.
163 * @name: The name of the component or the calling function (for error logging).
165 * Logs an error describing the first mismatch found.
167 * Return: VDO_SUCCESS if the header meets expectations,
168 * VDO_INCORRECT_COMPONENT if the component ids don't match,
169 * VDO_UNSUPPORTED_VERSION if the versions or sizes don't match.
171 int vdo_validate_header(const struct header *expected_header,
172 const struct header *actual_header, bool exact_size,
177 if (expected_header->id != actual_header->id) {
178 return uds_log_error_strerror(VDO_INCORRECT_COMPONENT,
179 "%s ID mismatch, expected %d, got %d",
180 name, expected_header->id,
184 result = validate_version(expected_header->version, actual_header->version,
186 if (result != VDO_SUCCESS)
189 if ((expected_header->size > actual_header->size) ||
190 (exact_size && (expected_header->size < actual_header->size))) {
191 return uds_log_error_strerror(VDO_UNSUPPORTED_VERSION,
192 "%s size mismatch, expected %zu, got %zu",
193 name, expected_header->size,
194 actual_header->size);
200 static void encode_version_number(u8 *buffer, size_t *offset,
201 struct version_number version)
203 struct packed_version_number packed = vdo_pack_version_number(version);
205 memcpy(buffer + *offset, &packed, sizeof(packed));
206 *offset += sizeof(packed);
209 void vdo_encode_header(u8 *buffer, size_t *offset, const struct header *header)
211 struct packed_header packed = vdo_pack_header(header);
213 memcpy(buffer + *offset, &packed, sizeof(packed));
214 *offset += sizeof(packed);
217 static void decode_version_number(u8 *buffer, size_t *offset,
218 struct version_number *version)
220 struct packed_version_number packed;
222 memcpy(&packed, buffer + *offset, sizeof(packed));
223 *offset += sizeof(packed);
224 *version = vdo_unpack_version_number(packed);
227 void vdo_decode_header(u8 *buffer, size_t *offset, struct header *header)
229 struct packed_header packed;
231 memcpy(&packed, buffer + *offset, sizeof(packed));
232 *offset += sizeof(packed);
234 *header = vdo_unpack_header(&packed);
238 * decode_volume_geometry() - Decode the on-disk representation of a volume geometry from a buffer.
239 * @buffer: A buffer to decode from.
240 * @offset: The offset in the buffer at which to decode.
241 * @geometry: The structure to receive the decoded fields.
242 * @version: The geometry block version to decode.
244 static void decode_volume_geometry(u8 *buffer, size_t *offset,
245 struct volume_geometry *geometry, u32 version)
248 enum volume_region_id id;
250 block_count_t bio_offset = 0;
253 /* This is for backwards compatibility. */
254 decode_u32_le(buffer, offset, &unused);
255 geometry->unused = unused;
257 decode_u64_le(buffer, offset, &nonce);
258 geometry->nonce = nonce;
260 memcpy((unsigned char *) &geometry->uuid, buffer + *offset, sizeof(uuid_t));
261 *offset += sizeof(uuid_t);
264 decode_u64_le(buffer, offset, &bio_offset);
265 geometry->bio_offset = bio_offset;
267 for (id = 0; id < VDO_VOLUME_REGION_COUNT; id++) {
268 physical_block_number_t start_block;
269 enum volume_region_id saved_id;
271 decode_u32_le(buffer, offset, &saved_id);
272 decode_u64_le(buffer, offset, &start_block);
274 geometry->regions[id] = (struct volume_region) {
276 .start_block = start_block,
280 decode_u32_le(buffer, offset, &mem);
281 *offset += sizeof(u32);
282 sparse = buffer[(*offset)++];
284 geometry->index_config = (struct index_config) {
291 * vdo_parse_geometry_block() - Decode and validate an encoded geometry block.
292 * @block: The encoded geometry block.
293 * @geometry: The structure to receive the decoded fields.
295 int __must_check vdo_parse_geometry_block(u8 *block, struct volume_geometry *geometry)
297 u32 checksum, saved_checksum;
298 struct header header;
302 if (memcmp(block, VDO_GEOMETRY_MAGIC_NUMBER, VDO_GEOMETRY_MAGIC_NUMBER_SIZE) != 0)
303 return VDO_BAD_MAGIC;
304 offset += VDO_GEOMETRY_MAGIC_NUMBER_SIZE;
306 vdo_decode_header(block, &offset, &header);
307 if (header.version.major_version <= 4) {
308 result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_4_0, &header,
311 result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_5_0, &header,
314 if (result != VDO_SUCCESS)
317 decode_volume_geometry(block, &offset, geometry, header.version.major_version);
319 result = ASSERT(header.size == offset + sizeof(u32),
320 "should have decoded up to the geometry checksum");
321 if (result != VDO_SUCCESS)
324 /* Decode and verify the checksum. */
325 checksum = vdo_crc32(block, offset);
326 decode_u32_le(block, &offset, &saved_checksum);
328 return ((checksum == saved_checksum) ? VDO_SUCCESS : VDO_CHECKSUM_MISMATCH);
331 struct block_map_page *vdo_format_block_map_page(void *buffer, nonce_t nonce,
332 physical_block_number_t pbn,
335 struct block_map_page *page = buffer;
337 memset(buffer, 0, VDO_BLOCK_SIZE);
338 page->version = vdo_pack_version_number(BLOCK_MAP_4_1);
339 page->header.nonce = __cpu_to_le64(nonce);
340 page->header.pbn = __cpu_to_le64(pbn);
341 page->header.initialized = initialized;
345 enum block_map_page_validity vdo_validate_block_map_page(struct block_map_page *page,
347 physical_block_number_t pbn)
349 BUILD_BUG_ON(sizeof(struct block_map_page_header) != PAGE_HEADER_4_1_SIZE);
351 if (!vdo_are_same_version(BLOCK_MAP_4_1,
352 vdo_unpack_version_number(page->version)) ||
353 !page->header.initialized || (nonce != __le64_to_cpu(page->header.nonce)))
354 return VDO_BLOCK_MAP_PAGE_INVALID;
356 if (pbn != vdo_get_block_map_page_pbn(page))
357 return VDO_BLOCK_MAP_PAGE_BAD;
359 return VDO_BLOCK_MAP_PAGE_VALID;
362 static int decode_block_map_state_2_0(u8 *buffer, size_t *offset,
363 struct block_map_state_2_0 *state)
365 size_t initial_offset;
366 block_count_t flat_page_count, root_count;
367 physical_block_number_t flat_page_origin, root_origin;
368 struct header header;
371 vdo_decode_header(buffer, offset, &header);
372 result = vdo_validate_header(&VDO_BLOCK_MAP_HEADER_2_0, &header, true, __func__);
373 if (result != VDO_SUCCESS)
376 initial_offset = *offset;
378 decode_u64_le(buffer, offset, &flat_page_origin);
379 result = ASSERT(flat_page_origin == VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
380 "Flat page origin must be %u (recorded as %llu)",
381 VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
382 (unsigned long long) state->flat_page_origin);
383 if (result != UDS_SUCCESS)
386 decode_u64_le(buffer, offset, &flat_page_count);
387 result = ASSERT(flat_page_count == 0,
388 "Flat page count must be 0 (recorded as %llu)",
389 (unsigned long long) state->flat_page_count);
390 if (result != UDS_SUCCESS)
393 decode_u64_le(buffer, offset, &root_origin);
394 decode_u64_le(buffer, offset, &root_count);
396 result = ASSERT(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset,
397 "decoded block map component size must match header size");
398 if (result != VDO_SUCCESS)
401 *state = (struct block_map_state_2_0) {
402 .flat_page_origin = flat_page_origin,
403 .flat_page_count = flat_page_count,
404 .root_origin = root_origin,
405 .root_count = root_count,
411 static void encode_block_map_state_2_0(u8 *buffer, size_t *offset,
412 struct block_map_state_2_0 state)
414 size_t initial_offset;
416 vdo_encode_header(buffer, offset, &VDO_BLOCK_MAP_HEADER_2_0);
418 initial_offset = *offset;
419 encode_u64_le(buffer, offset, state.flat_page_origin);
420 encode_u64_le(buffer, offset, state.flat_page_count);
421 encode_u64_le(buffer, offset, state.root_origin);
422 encode_u64_le(buffer, offset, state.root_count);
424 ASSERT_LOG_ONLY(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset,
425 "encoded block map component size must match header size");
429 * vdo_compute_new_forest_pages() - Compute the number of pages which must be allocated at each
430 * level in order to grow the forest to a new number of entries.
431 * @entries: The new number of entries the block map must address.
433 * Return: The total number of non-leaf pages required.
435 block_count_t vdo_compute_new_forest_pages(root_count_t root_count,
436 struct boundary *old_sizes,
437 block_count_t entries,
438 struct boundary *new_sizes)
440 page_count_t leaf_pages = max(vdo_compute_block_map_page_count(entries), 1U);
441 page_count_t level_size = DIV_ROUND_UP(leaf_pages, root_count);
442 block_count_t total_pages = 0;
445 for (height = 0; height < VDO_BLOCK_MAP_TREE_HEIGHT; height++) {
446 block_count_t new_pages;
448 level_size = DIV_ROUND_UP(level_size, VDO_BLOCK_MAP_ENTRIES_PER_PAGE);
449 new_sizes->levels[height] = level_size;
450 new_pages = level_size;
451 if (old_sizes != NULL)
452 new_pages -= old_sizes->levels[height];
453 total_pages += (new_pages * root_count);
460 * encode_recovery_journal_state_7_0() - Encode the state of a recovery journal.
462 * Return: VDO_SUCCESS or an error code.
464 static void encode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
465 struct recovery_journal_state_7_0 state)
467 size_t initial_offset;
469 vdo_encode_header(buffer, offset, &VDO_RECOVERY_JOURNAL_HEADER_7_0);
471 initial_offset = *offset;
472 encode_u64_le(buffer, offset, state.journal_start);
473 encode_u64_le(buffer, offset, state.logical_blocks_used);
474 encode_u64_le(buffer, offset, state.block_map_data_blocks);
476 ASSERT_LOG_ONLY(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset,
477 "encoded recovery journal component size must match header size");
481 * decode_recovery_journal_state_7_0() - Decode the state of a recovery journal saved in a buffer.
482 * @buffer: The buffer containing the saved state.
483 * @state: A pointer to a recovery journal state to hold the result of a successful decode.
485 * Return: VDO_SUCCESS or an error code.
487 static int __must_check decode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
488 struct recovery_journal_state_7_0 *state)
490 struct header header;
492 size_t initial_offset;
493 sequence_number_t journal_start;
494 block_count_t logical_blocks_used, block_map_data_blocks;
496 vdo_decode_header(buffer, offset, &header);
497 result = vdo_validate_header(&VDO_RECOVERY_JOURNAL_HEADER_7_0, &header, true,
499 if (result != VDO_SUCCESS)
502 initial_offset = *offset;
503 decode_u64_le(buffer, offset, &journal_start);
504 decode_u64_le(buffer, offset, &logical_blocks_used);
505 decode_u64_le(buffer, offset, &block_map_data_blocks);
507 result = ASSERT(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset,
508 "decoded recovery journal component size must match header size");
509 if (result != UDS_SUCCESS)
512 *state = (struct recovery_journal_state_7_0) {
513 .journal_start = journal_start,
514 .logical_blocks_used = logical_blocks_used,
515 .block_map_data_blocks = block_map_data_blocks,
522 * vdo_get_journal_operation_name() - Get the name of a journal operation.
523 * @operation: The operation to name.
525 * Return: The name of the operation.
527 const char *vdo_get_journal_operation_name(enum journal_operation operation)
530 case VDO_JOURNAL_DATA_REMAPPING:
531 return "data remapping";
533 case VDO_JOURNAL_BLOCK_MAP_REMAPPING:
534 return "block map remapping";
537 return "unknown journal operation";
542 * encode_slab_depot_state_2_0() - Encode the state of a slab depot into a buffer.
544 * Return: UDS_SUCCESS or an error.
546 static void encode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
547 struct slab_depot_state_2_0 state)
549 size_t initial_offset;
551 vdo_encode_header(buffer, offset, &VDO_SLAB_DEPOT_HEADER_2_0);
553 initial_offset = *offset;
554 encode_u64_le(buffer, offset, state.slab_config.slab_blocks);
555 encode_u64_le(buffer, offset, state.slab_config.data_blocks);
556 encode_u64_le(buffer, offset, state.slab_config.reference_count_blocks);
557 encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocks);
558 encode_u64_le(buffer, offset, state.slab_config.slab_journal_flushing_threshold);
559 encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocking_threshold);
560 encode_u64_le(buffer, offset, state.slab_config.slab_journal_scrubbing_threshold);
561 encode_u64_le(buffer, offset, state.first_block);
562 encode_u64_le(buffer, offset, state.last_block);
563 buffer[(*offset)++] = state.zone_count;
565 ASSERT_LOG_ONLY(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset,
566 "encoded block map component size must match header size");
570 * decode_slab_depot_state_2_0() - Decode slab depot component state version 2.0 from a buffer.
572 * Return: UDS_SUCCESS or an error code.
574 static int decode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
575 struct slab_depot_state_2_0 *state)
577 struct header header;
579 size_t initial_offset;
580 struct slab_config slab_config;
582 physical_block_number_t first_block, last_block;
583 zone_count_t zone_count;
585 vdo_decode_header(buffer, offset, &header);
586 result = vdo_validate_header(&VDO_SLAB_DEPOT_HEADER_2_0, &header, true,
588 if (result != VDO_SUCCESS)
591 initial_offset = *offset;
592 decode_u64_le(buffer, offset, &count);
593 slab_config.slab_blocks = count;
595 decode_u64_le(buffer, offset, &count);
596 slab_config.data_blocks = count;
598 decode_u64_le(buffer, offset, &count);
599 slab_config.reference_count_blocks = count;
601 decode_u64_le(buffer, offset, &count);
602 slab_config.slab_journal_blocks = count;
604 decode_u64_le(buffer, offset, &count);
605 slab_config.slab_journal_flushing_threshold = count;
607 decode_u64_le(buffer, offset, &count);
608 slab_config.slab_journal_blocking_threshold = count;
610 decode_u64_le(buffer, offset, &count);
611 slab_config.slab_journal_scrubbing_threshold = count;
613 decode_u64_le(buffer, offset, &first_block);
614 decode_u64_le(buffer, offset, &last_block);
615 zone_count = buffer[(*offset)++];
617 result = ASSERT(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset,
618 "decoded slab depot component size must match header size");
619 if (result != UDS_SUCCESS)
622 *state = (struct slab_depot_state_2_0) {
623 .slab_config = slab_config,
624 .first_block = first_block,
625 .last_block = last_block,
626 .zone_count = zone_count,
633 * vdo_configure_slab_depot() - Configure the slab depot.
634 * @partition: The slab depot partition
635 * @slab_config: The configuration of a single slab.
636 * @zone_count: The number of zones the depot will use.
637 * @state: The state structure to be configured.
639 * Configures the slab_depot for the specified storage capacity, finding the number of data blocks
640 * that will fit and still leave room for the depot metadata, then return the saved state for that
643 * Return: VDO_SUCCESS or an error code.
645 int vdo_configure_slab_depot(const struct partition *partition,
646 struct slab_config slab_config, zone_count_t zone_count,
647 struct slab_depot_state_2_0 *state)
649 block_count_t total_slab_blocks, total_data_blocks;
651 physical_block_number_t last_block;
652 block_count_t slab_size = slab_config.slab_blocks;
654 uds_log_debug("slabDepot %s(block_count=%llu, first_block=%llu, slab_size=%llu, zone_count=%u)",
655 __func__, (unsigned long long) partition->count,
656 (unsigned long long) partition->offset,
657 (unsigned long long) slab_size, zone_count);
659 /* We do not allow runt slabs, so we waste up to a slab's worth. */
660 slab_count = (partition->count / slab_size);
664 if (slab_count > MAX_VDO_SLABS)
665 return VDO_TOO_MANY_SLABS;
667 total_slab_blocks = slab_count * slab_config.slab_blocks;
668 total_data_blocks = slab_count * slab_config.data_blocks;
669 last_block = partition->offset + total_slab_blocks;
671 *state = (struct slab_depot_state_2_0) {
672 .slab_config = slab_config,
673 .first_block = partition->offset,
674 .last_block = last_block,
675 .zone_count = zone_count,
678 uds_log_debug("slab_depot last_block=%llu, total_data_blocks=%llu, slab_count=%zu, left_over=%llu",
679 (unsigned long long) last_block,
680 (unsigned long long) total_data_blocks, slab_count,
681 (unsigned long long) (partition->count - (last_block - partition->offset)));
687 * vdo_configure_slab() - Measure and initialize the configuration to use for each slab.
688 * @slab_size: The number of blocks per slab.
689 * @slab_journal_blocks: The number of blocks for the slab journal.
690 * @slab_config: The slab configuration to initialize.
692 * Return: VDO_SUCCESS or an error code.
694 int vdo_configure_slab(block_count_t slab_size, block_count_t slab_journal_blocks,
695 struct slab_config *slab_config)
697 block_count_t ref_blocks, meta_blocks, data_blocks;
698 block_count_t flushing_threshold, remaining, blocking_threshold;
699 block_count_t minimal_extra_space, scrubbing_threshold;
701 if (slab_journal_blocks >= slab_size)
702 return VDO_BAD_CONFIGURATION;
705 * This calculation should technically be a recurrence, but the total number of metadata
706 * blocks is currently less than a single block of ref_counts, so we'd gain at most one
707 * data block in each slab with more iteration.
709 ref_blocks = vdo_get_saved_reference_count_size(slab_size - slab_journal_blocks);
710 meta_blocks = (ref_blocks + slab_journal_blocks);
712 /* Make sure test code hasn't configured slabs to be too small. */
713 if (meta_blocks >= slab_size)
714 return VDO_BAD_CONFIGURATION;
717 * If the slab size is very small, assume this must be a unit test and override the number
718 * of data blocks to be a power of two (wasting blocks in the slab). Many tests need their
719 * data_blocks fields to be the exact capacity of the configured volume, and that used to
720 * fall out since they use a power of two for the number of data blocks, the slab size was
721 * a power of two, and every block in a slab was a data block.
723 * TODO: Try to figure out some way of structuring testParameters and unit tests so this
724 * hack isn't needed without having to edit several unit tests every time the metadata size
725 * changes by one block.
727 data_blocks = slab_size - meta_blocks;
728 if ((slab_size < 1024) && !is_power_of_2(data_blocks))
729 data_blocks = ((block_count_t) 1 << ilog2(data_blocks));
732 * Configure the slab journal thresholds. The flush threshold is 168 of 224 blocks in
733 * production, or 3/4ths, so we use this ratio for all sizes.
735 flushing_threshold = ((slab_journal_blocks * 3) + 3) / 4;
737 * The blocking threshold should be far enough from the flushing threshold to not produce
738 * delays, but far enough from the end of the journal to allow multiple successive recovery
741 remaining = slab_journal_blocks - flushing_threshold;
742 blocking_threshold = flushing_threshold + ((remaining * 5) / 7);
743 /* The scrubbing threshold should be at least 2048 entries before the end of the journal. */
744 minimal_extra_space = 1 + (MAXIMUM_VDO_USER_VIOS / VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK);
745 scrubbing_threshold = blocking_threshold;
746 if (slab_journal_blocks > minimal_extra_space)
747 scrubbing_threshold = slab_journal_blocks - minimal_extra_space;
748 if (blocking_threshold > scrubbing_threshold)
749 blocking_threshold = scrubbing_threshold;
751 *slab_config = (struct slab_config) {
752 .slab_blocks = slab_size,
753 .data_blocks = data_blocks,
754 .reference_count_blocks = ref_blocks,
755 .slab_journal_blocks = slab_journal_blocks,
756 .slab_journal_flushing_threshold = flushing_threshold,
757 .slab_journal_blocking_threshold = blocking_threshold,
758 .slab_journal_scrubbing_threshold = scrubbing_threshold};
763 * vdo_decode_slab_journal_entry() - Decode a slab journal entry.
764 * @block: The journal block holding the entry.
765 * @entry_count: The number of the entry.
767 * Return: The decoded entry.
769 struct slab_journal_entry vdo_decode_slab_journal_entry(struct packed_slab_journal_block *block,
770 journal_entry_count_t entry_count)
772 struct slab_journal_entry entry =
773 vdo_unpack_slab_journal_entry(&block->payload.entries[entry_count]);
775 if (block->header.has_block_map_increments &&
776 ((block->payload.full_entries.entry_types[entry_count / 8] &
777 ((u8) 1 << (entry_count % 8))) != 0))
778 entry.operation = VDO_JOURNAL_BLOCK_MAP_REMAPPING;
784 * allocate_partition() - Allocate a partition and add it to a layout.
785 * @layout: The layout containing the partition.
786 * @id: The id of the partition.
787 * @offset: The offset into the layout at which the partition begins.
788 * @size: The size of the partition in blocks.
790 * Return: VDO_SUCCESS or an error.
792 static int allocate_partition(struct layout *layout, u8 id,
793 physical_block_number_t offset, block_count_t size)
795 struct partition *partition;
798 result = uds_allocate(1, struct partition, __func__, &partition);
799 if (result != UDS_SUCCESS)
803 partition->offset = offset;
804 partition->count = size;
805 partition->next = layout->head;
806 layout->head = partition;
812 * make_partition() - Create a new partition from the beginning or end of the unused space in a
814 * @layout: The layout.
815 * @id: The id of the partition to make.
816 * @size: The number of blocks to carve out; if 0, all remaining space will be used.
817 * @beginning: True if the partition should start at the beginning of the unused space.
819 * Return: A success or error code, particularly VDO_NO_SPACE if there are fewer than size blocks
822 static int __must_check make_partition(struct layout *layout, enum partition_id id,
823 block_count_t size, bool beginning)
826 physical_block_number_t offset;
827 block_count_t free_blocks = layout->last_free - layout->first_free;
830 if (free_blocks == 0)
833 } else if (size > free_blocks) {
837 result = vdo_get_partition(layout, id, NULL);
838 if (result != VDO_UNKNOWN_PARTITION)
839 return VDO_PARTITION_EXISTS;
841 offset = beginning ? layout->first_free : (layout->last_free - size);
843 result = allocate_partition(layout, id, offset, size);
844 if (result != VDO_SUCCESS)
847 layout->num_partitions++;
849 layout->first_free += size;
851 layout->last_free = layout->last_free - size;
857 * vdo_initialize_layout() - Lay out the partitions of a vdo.
858 * @size: The entire size of the vdo.
859 * @origin: The start of the layout on the underlying storage in blocks.
860 * @block_map_blocks: The size of the block map partition.
861 * @journal_blocks: The size of the journal partition.
862 * @summary_blocks: The size of the slab summary partition.
863 * @layout: The layout to initialize.
865 * Return: VDO_SUCCESS or an error.
867 int vdo_initialize_layout(block_count_t size, physical_block_number_t offset,
868 block_count_t block_map_blocks, block_count_t journal_blocks,
869 block_count_t summary_blocks, struct layout *layout)
872 block_count_t necessary_size =
873 (offset + block_map_blocks + journal_blocks + summary_blocks);
875 if (necessary_size > size)
876 return uds_log_error_strerror(VDO_NO_SPACE,
877 "Not enough space to make a VDO");
879 *layout = (struct layout) {
882 .first_free = offset,
888 result = make_partition(layout, VDO_BLOCK_MAP_PARTITION, block_map_blocks, true);
889 if (result != VDO_SUCCESS) {
890 vdo_uninitialize_layout(layout);
894 result = make_partition(layout, VDO_SLAB_SUMMARY_PARTITION, summary_blocks,
896 if (result != VDO_SUCCESS) {
897 vdo_uninitialize_layout(layout);
901 result = make_partition(layout, VDO_RECOVERY_JOURNAL_PARTITION, journal_blocks,
903 if (result != VDO_SUCCESS) {
904 vdo_uninitialize_layout(layout);
908 result = make_partition(layout, VDO_SLAB_DEPOT_PARTITION, 0, true);
909 if (result != VDO_SUCCESS)
910 vdo_uninitialize_layout(layout);
916 * vdo_uninitialize_layout() - Clean up a layout.
917 * @layout: The layout to clean up.
919 * All partitions created by this layout become invalid pointers.
921 void vdo_uninitialize_layout(struct layout *layout)
923 while (layout->head != NULL) {
924 struct partition *part = layout->head;
926 layout->head = part->next;
930 memset(layout, 0, sizeof(struct layout));
934 * vdo_get_partition() - Get a partition by id.
935 * @layout: The layout from which to get a partition.
936 * @id: The id of the partition.
937 * @partition_ptr: A pointer to hold the partition.
939 * Return: VDO_SUCCESS or an error.
941 int vdo_get_partition(struct layout *layout, enum partition_id id,
942 struct partition **partition_ptr)
944 struct partition *partition;
946 for (partition = layout->head; partition != NULL; partition = partition->next) {
947 if (partition->id == id) {
948 if (partition_ptr != NULL)
949 *partition_ptr = partition;
954 return VDO_UNKNOWN_PARTITION;
958 * vdo_get_known_partition() - Get a partition by id from a validated layout.
959 * @layout: The layout from which to get a partition.
960 * @id: The id of the partition.
962 * Return: the partition
964 struct partition *vdo_get_known_partition(struct layout *layout, enum partition_id id)
966 struct partition *partition;
967 int result = vdo_get_partition(layout, id, &partition);
969 ASSERT_LOG_ONLY(result == VDO_SUCCESS, "layout has expected partition: %u", id);
974 static void encode_layout(u8 *buffer, size_t *offset, const struct layout *layout)
976 const struct partition *partition;
977 size_t initial_offset;
978 struct header header = VDO_LAYOUT_HEADER_3_0;
980 BUILD_BUG_ON(sizeof(enum partition_id) != sizeof(u8));
981 ASSERT_LOG_ONLY(layout->num_partitions <= U8_MAX,
982 "layout partition count must fit in a byte");
984 vdo_encode_header(buffer, offset, &header);
986 initial_offset = *offset;
987 encode_u64_le(buffer, offset, layout->first_free);
988 encode_u64_le(buffer, offset, layout->last_free);
989 buffer[(*offset)++] = layout->num_partitions;
991 ASSERT_LOG_ONLY(sizeof(struct layout_3_0) == *offset - initial_offset,
992 "encoded size of a layout header must match structure");
994 for (partition = layout->head; partition != NULL; partition = partition->next) {
995 buffer[(*offset)++] = partition->id;
996 encode_u64_le(buffer, offset, partition->offset);
997 /* This field only exists for backwards compatibility */
998 encode_u64_le(buffer, offset, 0);
999 encode_u64_le(buffer, offset, partition->count);
1002 ASSERT_LOG_ONLY(header.size == *offset - initial_offset,
1003 "encoded size of a layout must match header size");
1006 static int decode_layout(u8 *buffer, size_t *offset, physical_block_number_t start,
1007 block_count_t size, struct layout *layout)
1009 struct header header;
1010 struct layout_3_0 layout_header;
1011 struct partition *partition;
1012 size_t initial_offset;
1013 physical_block_number_t first_free, last_free;
1018 vdo_decode_header(buffer, offset, &header);
1019 /* Layout is variable size, so only do a minimum size check here. */
1020 result = vdo_validate_header(&VDO_LAYOUT_HEADER_3_0, &header, false, __func__);
1021 if (result != VDO_SUCCESS)
1024 initial_offset = *offset;
1025 decode_u64_le(buffer, offset, &first_free);
1026 decode_u64_le(buffer, offset, &last_free);
1027 partition_count = buffer[(*offset)++];
1028 layout_header = (struct layout_3_0) {
1029 .first_free = first_free,
1030 .last_free = last_free,
1031 .partition_count = partition_count,
1034 result = ASSERT(sizeof(struct layout_3_0) == *offset - initial_offset,
1035 "decoded size of a layout header must match structure");
1036 if (result != VDO_SUCCESS)
1039 layout->start = start;
1040 layout->size = size;
1041 layout->first_free = layout_header.first_free;
1042 layout->last_free = layout_header.last_free;
1043 layout->num_partitions = layout_header.partition_count;
1045 if (layout->num_partitions > VDO_PARTITION_COUNT) {
1046 return uds_log_error_strerror(VDO_UNKNOWN_PARTITION,
1047 "layout has extra partitions");
1050 for (i = 0; i < layout->num_partitions; i++) {
1052 u64 partition_offset, count;
1054 id = buffer[(*offset)++];
1055 decode_u64_le(buffer, offset, &partition_offset);
1056 *offset += sizeof(u64);
1057 decode_u64_le(buffer, offset, &count);
1059 result = allocate_partition(layout, id, partition_offset, count);
1060 if (result != VDO_SUCCESS) {
1061 vdo_uninitialize_layout(layout);
1066 /* Validate that the layout has all (and only) the required partitions */
1067 for (i = 0; i < VDO_PARTITION_COUNT; i++) {
1068 result = vdo_get_partition(layout, REQUIRED_PARTITIONS[i], &partition);
1069 if (result != VDO_SUCCESS) {
1070 vdo_uninitialize_layout(layout);
1071 return uds_log_error_strerror(result,
1072 "layout is missing required partition %u",
1073 REQUIRED_PARTITIONS[i]);
1076 start += partition->count;
1079 if (start != size) {
1080 vdo_uninitialize_layout(layout);
1081 return uds_log_error_strerror(UDS_BAD_STATE,
1082 "partitions do not cover the layout");
1089 * pack_vdo_config() - Convert a vdo_config to its packed on-disk representation.
1090 * @config: The vdo config to convert.
1092 * Return: The platform-independent representation of the config.
1094 static struct packed_vdo_config pack_vdo_config(struct vdo_config config)
1096 return (struct packed_vdo_config) {
1097 .logical_blocks = __cpu_to_le64(config.logical_blocks),
1098 .physical_blocks = __cpu_to_le64(config.physical_blocks),
1099 .slab_size = __cpu_to_le64(config.slab_size),
1100 .recovery_journal_size = __cpu_to_le64(config.recovery_journal_size),
1101 .slab_journal_blocks = __cpu_to_le64(config.slab_journal_blocks),
1106 * pack_vdo_component() - Convert a vdo_component to its packed on-disk representation.
1107 * @component: The VDO component data to convert.
1109 * Return: The platform-independent representation of the component.
1111 static struct packed_vdo_component_41_0 pack_vdo_component(const struct vdo_component component)
1113 return (struct packed_vdo_component_41_0) {
1114 .state = __cpu_to_le32(component.state),
1115 .complete_recoveries = __cpu_to_le64(component.complete_recoveries),
1116 .read_only_recoveries = __cpu_to_le64(component.read_only_recoveries),
1117 .config = pack_vdo_config(component.config),
1118 .nonce = __cpu_to_le64(component.nonce),
1122 static void encode_vdo_component(u8 *buffer, size_t *offset,
1123 struct vdo_component component)
1125 struct packed_vdo_component_41_0 packed;
1127 encode_version_number(buffer, offset, VDO_COMPONENT_DATA_41_0);
1128 packed = pack_vdo_component(component);
1129 memcpy(buffer + *offset, &packed, sizeof(packed));
1130 *offset += sizeof(packed);
1134 * unpack_vdo_config() - Convert a packed_vdo_config to its native in-memory representation.
1135 * @config: The packed vdo config to convert.
1137 * Return: The native in-memory representation of the vdo config.
1139 static struct vdo_config unpack_vdo_config(struct packed_vdo_config config)
1141 return (struct vdo_config) {
1142 .logical_blocks = __le64_to_cpu(config.logical_blocks),
1143 .physical_blocks = __le64_to_cpu(config.physical_blocks),
1144 .slab_size = __le64_to_cpu(config.slab_size),
1145 .recovery_journal_size = __le64_to_cpu(config.recovery_journal_size),
1146 .slab_journal_blocks = __le64_to_cpu(config.slab_journal_blocks),
1151 * unpack_vdo_component_41_0() - Convert a packed_vdo_component_41_0 to its native in-memory
1153 * @component: The packed vdo component data to convert.
1155 * Return: The native in-memory representation of the component.
1157 static struct vdo_component unpack_vdo_component_41_0(struct packed_vdo_component_41_0 component)
1159 return (struct vdo_component) {
1160 .state = __le32_to_cpu(component.state),
1161 .complete_recoveries = __le64_to_cpu(component.complete_recoveries),
1162 .read_only_recoveries = __le64_to_cpu(component.read_only_recoveries),
1163 .config = unpack_vdo_config(component.config),
1164 .nonce = __le64_to_cpu(component.nonce),
1169 * vdo_decode_component() - Decode the component data for the vdo itself out of the super block.
1171 * Return: VDO_SUCCESS or an error.
1173 static int decode_vdo_component(u8 *buffer, size_t *offset, struct vdo_component *component)
1175 struct version_number version;
1176 struct packed_vdo_component_41_0 packed;
1179 decode_version_number(buffer, offset, &version);
1180 result = validate_version(version, VDO_COMPONENT_DATA_41_0,
1181 "VDO component data");
1182 if (result != VDO_SUCCESS)
1185 memcpy(&packed, buffer + *offset, sizeof(packed));
1186 *offset += sizeof(packed);
1187 *component = unpack_vdo_component_41_0(packed);
1192 * vdo_validate_config() - Validate constraints on a VDO config.
1193 * @config: The VDO config.
1194 * @physical_block_count: The minimum block count of the underlying storage.
1195 * @logical_block_count: The expected logical size of the VDO, or 0 if the logical size may be
1198 * Return: A success or error code.
1200 int vdo_validate_config(const struct vdo_config *config,
1201 block_count_t physical_block_count,
1202 block_count_t logical_block_count)
1204 struct slab_config slab_config;
1207 result = ASSERT(config->slab_size > 0, "slab size unspecified");
1208 if (result != UDS_SUCCESS)
1211 result = ASSERT(is_power_of_2(config->slab_size),
1212 "slab size must be a power of two");
1213 if (result != UDS_SUCCESS)
1216 result = ASSERT(config->slab_size <= (1 << MAX_VDO_SLAB_BITS),
1217 "slab size must be less than or equal to 2^%d",
1219 if (result != VDO_SUCCESS)
1222 result = ASSERT(config->slab_journal_blocks >= MINIMUM_VDO_SLAB_JOURNAL_BLOCKS,
1223 "slab journal size meets minimum size");
1224 if (result != UDS_SUCCESS)
1227 result = ASSERT(config->slab_journal_blocks <= config->slab_size,
1228 "slab journal size is within expected bound");
1229 if (result != UDS_SUCCESS)
1232 result = vdo_configure_slab(config->slab_size, config->slab_journal_blocks,
1234 if (result != VDO_SUCCESS)
1237 result = ASSERT((slab_config.data_blocks >= 1),
1238 "slab must be able to hold at least one block");
1239 if (result != UDS_SUCCESS)
1242 result = ASSERT(config->physical_blocks > 0, "physical blocks unspecified");
1243 if (result != UDS_SUCCESS)
1246 result = ASSERT(config->physical_blocks <= MAXIMUM_VDO_PHYSICAL_BLOCKS,
1247 "physical block count %llu exceeds maximum %llu",
1248 (unsigned long long) config->physical_blocks,
1249 (unsigned long long) MAXIMUM_VDO_PHYSICAL_BLOCKS);
1250 if (result != UDS_SUCCESS)
1251 return VDO_OUT_OF_RANGE;
1253 if (physical_block_count != config->physical_blocks) {
1254 uds_log_error("A physical size of %llu blocks was specified, not the %llu blocks configured in the vdo super block",
1255 (unsigned long long) physical_block_count,
1256 (unsigned long long) config->physical_blocks);
1257 return VDO_PARAMETER_MISMATCH;
1260 if (logical_block_count > 0) {
1261 result = ASSERT((config->logical_blocks > 0),
1262 "logical blocks unspecified");
1263 if (result != UDS_SUCCESS)
1266 if (logical_block_count != config->logical_blocks) {
1267 uds_log_error("A logical size of %llu blocks was specified, but that differs from the %llu blocks configured in the vdo super block",
1268 (unsigned long long) logical_block_count,
1269 (unsigned long long) config->logical_blocks);
1270 return VDO_PARAMETER_MISMATCH;
1274 result = ASSERT(config->logical_blocks <= MAXIMUM_VDO_LOGICAL_BLOCKS,
1275 "logical blocks too large");
1276 if (result != UDS_SUCCESS)
1279 result = ASSERT(config->recovery_journal_size > 0,
1280 "recovery journal size unspecified");
1281 if (result != UDS_SUCCESS)
1284 result = ASSERT(is_power_of_2(config->recovery_journal_size),
1285 "recovery journal size must be a power of two");
1286 if (result != UDS_SUCCESS)
1293 * vdo_destroy_component_states() - Clean up any allocations in a vdo_component_states.
1294 * @states: The component states to destroy.
1296 void vdo_destroy_component_states(struct vdo_component_states *states)
1301 vdo_uninitialize_layout(&states->layout);
1305 * decode_components() - Decode the components now that we know the component data is a version we
1307 * @buffer: The buffer being decoded.
1308 * @offset: The offset to start decoding from.
1309 * @geometry: The vdo geometry
1310 * @states: An object to hold the successfully decoded state.
1312 * Return: VDO_SUCCESS or an error.
1314 static int __must_check decode_components(u8 *buffer, size_t *offset,
1315 struct volume_geometry *geometry,
1316 struct vdo_component_states *states)
1320 decode_vdo_component(buffer, offset, &states->vdo);
1322 result = decode_layout(buffer, offset, vdo_get_data_region_start(*geometry) + 1,
1323 states->vdo.config.physical_blocks, &states->layout);
1324 if (result != VDO_SUCCESS)
1327 result = decode_recovery_journal_state_7_0(buffer, offset,
1328 &states->recovery_journal);
1329 if (result != VDO_SUCCESS)
1332 result = decode_slab_depot_state_2_0(buffer, offset, &states->slab_depot);
1333 if (result != VDO_SUCCESS)
1336 result = decode_block_map_state_2_0(buffer, offset, &states->block_map);
1337 if (result != VDO_SUCCESS)
1340 ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE,
1341 "All decoded component data was used");
1346 * vdo_decode_component_states() - Decode the payload of a super block.
1347 * @buffer: The buffer containing the encoded super block contents.
1348 * @geometry: The vdo geometry
1349 * @states: A pointer to hold the decoded states.
1351 * Return: VDO_SUCCESS or an error.
1353 int vdo_decode_component_states(u8 *buffer, struct volume_geometry *geometry,
1354 struct vdo_component_states *states)
1357 size_t offset = VDO_COMPONENT_DATA_OFFSET;
1359 /* This is for backwards compatibility. */
1360 decode_u32_le(buffer, &offset, &states->unused);
1362 /* Check the VDO volume version */
1363 decode_version_number(buffer, &offset, &states->volume_version);
1364 result = validate_version(VDO_VOLUME_VERSION_67_0, states->volume_version,
1366 if (result != VDO_SUCCESS)
1369 result = decode_components(buffer, &offset, geometry, states);
1370 if (result != VDO_SUCCESS)
1371 vdo_uninitialize_layout(&states->layout);
1377 * vdo_validate_component_states() - Validate the decoded super block configuration.
1378 * @states: The state decoded from the super block.
1379 * @geometry_nonce: The nonce from the geometry block.
1380 * @physical_size: The minimum block count of the underlying storage.
1381 * @logical_size: The expected logical size of the VDO, or 0 if the logical size may be
1384 * Return: VDO_SUCCESS or an error if the configuration is invalid.
1386 int vdo_validate_component_states(struct vdo_component_states *states,
1387 nonce_t geometry_nonce, block_count_t physical_size,
1388 block_count_t logical_size)
1390 if (geometry_nonce != states->vdo.nonce) {
1391 return uds_log_error_strerror(VDO_BAD_NONCE,
1392 "Geometry nonce %llu does not match superblock nonce %llu",
1393 (unsigned long long) geometry_nonce,
1394 (unsigned long long) states->vdo.nonce);
1397 return vdo_validate_config(&states->vdo.config, physical_size, logical_size);
1401 * vdo_encode_component_states() - Encode the state of all vdo components in the super block.
1403 static void vdo_encode_component_states(u8 *buffer, size_t *offset,
1404 const struct vdo_component_states *states)
1406 /* This is for backwards compatibility. */
1407 encode_u32_le(buffer, offset, states->unused);
1408 encode_version_number(buffer, offset, states->volume_version);
1409 encode_vdo_component(buffer, offset, states->vdo);
1410 encode_layout(buffer, offset, &states->layout);
1411 encode_recovery_journal_state_7_0(buffer, offset, states->recovery_journal);
1412 encode_slab_depot_state_2_0(buffer, offset, states->slab_depot);
1413 encode_block_map_state_2_0(buffer, offset, states->block_map);
1415 ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE,
1416 "All super block component data was encoded");
1420 * vdo_encode_super_block() - Encode a super block into its on-disk representation.
1422 void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states)
1425 struct header header = SUPER_BLOCK_HEADER_12_0;
1428 header.size += VDO_COMPONENT_DATA_SIZE;
1429 vdo_encode_header(buffer, &offset, &header);
1430 vdo_encode_component_states(buffer, &offset, states);
1432 checksum = vdo_crc32(buffer, offset);
1433 encode_u32_le(buffer, &offset, checksum);
1436 * Even though the buffer is a full block, to avoid the potential corruption from a torn
1437 * write, the entire encoding must fit in the first sector.
1439 ASSERT_LOG_ONLY(offset <= VDO_SECTOR_SIZE,
1440 "entire superblock must fit in one sector");
1444 * vdo_decode_super_block() - Decode a super block from its on-disk representation.
1446 int vdo_decode_super_block(u8 *buffer)
1448 struct header header;
1450 u32 checksum, saved_checksum;
1453 /* Decode and validate the header. */
1454 vdo_decode_header(buffer, &offset, &header);
1455 result = vdo_validate_header(&SUPER_BLOCK_HEADER_12_0, &header, false, __func__);
1456 if (result != VDO_SUCCESS)
1459 if (header.size > VDO_COMPONENT_DATA_SIZE + sizeof(u32)) {
1461 * We can't check release version or checksum until we know the content size, so we
1462 * have to assume a version mismatch on unexpected values.
1464 return uds_log_error_strerror(VDO_UNSUPPORTED_VERSION,
1465 "super block contents too large: %zu",
1469 /* Skip past the component data for now, to verify the checksum. */
1470 offset += VDO_COMPONENT_DATA_SIZE;
1472 checksum = vdo_crc32(buffer, offset);
1473 decode_u32_le(buffer, &offset, &saved_checksum);
1475 result = ASSERT(offset == VDO_SUPER_BLOCK_FIXED_SIZE + VDO_COMPONENT_DATA_SIZE,
1476 "must have decoded entire superblock payload");
1477 if (result != VDO_SUCCESS)
1480 return ((checksum != saved_checksum) ? VDO_CHECKSUM_MISMATCH : VDO_SUCCESS);