dm vdo: fix sparse warnings about missing statics
[linux-block.git] / drivers / md / dm-vdo / encodings.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright 2023 Red Hat
4  */
5
6 #include "encodings.h"
7
8 #include <linux/log2.h>
9
10 #include "logger.h"
11 #include "memory-alloc.h"
12 #include "permassert.h"
13
14 #include "constants.h"
15 #include "status-codes.h"
16 #include "types.h"
17
18 struct geometry_block {
19         char magic_number[VDO_GEOMETRY_MAGIC_NUMBER_SIZE];
20         struct packed_header header;
21         u32 checksum;
22 } __packed;
23
24 static const struct header GEOMETRY_BLOCK_HEADER_5_0 = {
25         .id = VDO_GEOMETRY_BLOCK,
26         .version = {
27                 .major_version = 5,
28                 .minor_version = 0,
29         },
30         /*
31          * Note: this size isn't just the payload size following the header, like it is everywhere
32          * else in VDO.
33          */
34         .size = sizeof(struct geometry_block) + sizeof(struct volume_geometry),
35 };
36
37 static const struct header GEOMETRY_BLOCK_HEADER_4_0 = {
38         .id = VDO_GEOMETRY_BLOCK,
39         .version = {
40                 .major_version = 4,
41                 .minor_version = 0,
42         },
43         /*
44          * Note: this size isn't just the payload size following the header, like it is everywhere
45          * else in VDO.
46          */
47         .size = sizeof(struct geometry_block) + sizeof(struct volume_geometry_4_0),
48 };
49
50 const u8 VDO_GEOMETRY_MAGIC_NUMBER[VDO_GEOMETRY_MAGIC_NUMBER_SIZE + 1] = "dmvdo001";
51
52 enum {
53         PAGE_HEADER_4_1_SIZE = 8 + 8 + 8 + 1 + 1 + 1 + 1,
54 };
55
56 static const struct version_number BLOCK_MAP_4_1 = {
57         .major_version = 4,
58         .minor_version = 1,
59 };
60
61 const struct header VDO_BLOCK_MAP_HEADER_2_0 = {
62         .id = VDO_BLOCK_MAP,
63         .version = {
64                 .major_version = 2,
65                 .minor_version = 0,
66         },
67         .size = sizeof(struct block_map_state_2_0),
68 };
69
70 const struct header VDO_RECOVERY_JOURNAL_HEADER_7_0 = {
71         .id = VDO_RECOVERY_JOURNAL,
72         .version = {
73                         .major_version = 7,
74                         .minor_version = 0,
75                 },
76         .size = sizeof(struct recovery_journal_state_7_0),
77 };
78
79 const struct header VDO_SLAB_DEPOT_HEADER_2_0 = {
80         .id = VDO_SLAB_DEPOT,
81         .version = {
82                 .major_version = 2,
83                 .minor_version = 0,
84         },
85         .size = sizeof(struct slab_depot_state_2_0),
86 };
87
88 static const struct header VDO_LAYOUT_HEADER_3_0 = {
89         .id = VDO_LAYOUT,
90         .version = {
91                 .major_version = 3,
92                 .minor_version = 0,
93         },
94         .size = sizeof(struct layout_3_0) + (sizeof(struct partition_3_0) * VDO_PARTITION_COUNT),
95 };
96
97 static const enum partition_id REQUIRED_PARTITIONS[] = {
98         VDO_BLOCK_MAP_PARTITION,
99         VDO_SLAB_DEPOT_PARTITION,
100         VDO_RECOVERY_JOURNAL_PARTITION,
101         VDO_SLAB_SUMMARY_PARTITION,
102 };
103
104 /*
105  * The current version for the data encoded in the super block. This must be changed any time there
106  * is a change to encoding of the component data of any VDO component.
107  */
108 static const struct version_number VDO_COMPONENT_DATA_41_0 = {
109         .major_version = 41,
110         .minor_version = 0,
111 };
112
113 const struct version_number VDO_VOLUME_VERSION_67_0 = {
114         .major_version = 67,
115         .minor_version = 0,
116 };
117
118 static const struct header SUPER_BLOCK_HEADER_12_0 = {
119         .id = VDO_SUPER_BLOCK,
120         .version = {
121                         .major_version = 12,
122                         .minor_version = 0,
123                 },
124
125         /* This is the minimum size, if the super block contains no components. */
126         .size = VDO_SUPER_BLOCK_FIXED_SIZE - VDO_ENCODED_HEADER_SIZE,
127 };
128
129 /**
130  * validate_version() - Check whether a version matches an expected version.
131  * @expected_version: The expected version.
132  * @actual_version: The version being validated.
133  * @component_name: The name of the component or the calling function (for error logging).
134  *
135  * Logs an error describing a mismatch.
136  *
137  * Return: VDO_SUCCESS             if the versions are the same,
138  *         VDO_UNSUPPORTED_VERSION if the versions don't match.
139  */
140 static int __must_check validate_version(struct version_number expected_version,
141                                          struct version_number actual_version,
142                                          const char *component_name)
143 {
144         if (!vdo_are_same_version(expected_version, actual_version)) {
145                 return uds_log_error_strerror(VDO_UNSUPPORTED_VERSION,
146                                               "%s version mismatch, expected %d.%d, got %d.%d",
147                                               component_name,
148                                               expected_version.major_version,
149                                               expected_version.minor_version,
150                                               actual_version.major_version,
151                                               actual_version.minor_version);
152         }
153
154         return VDO_SUCCESS;
155 }
156
157 /**
158  * vdo_validate_header() - Check whether a header matches expectations.
159  * @expected_header: The expected header.
160  * @actual_header: The header being validated.
161  * @exact_size: If true, the size fields of the two headers must be the same, otherwise it is
162  *              required that actual_header.size >= expected_header.size.
163  * @name: The name of the component or the calling function (for error logging).
164  *
165  * Logs an error describing the first mismatch found.
166  *
167  * Return: VDO_SUCCESS             if the header meets expectations,
168  *         VDO_INCORRECT_COMPONENT if the component ids don't match,
169  *         VDO_UNSUPPORTED_VERSION if the versions or sizes don't match.
170  */
171 int vdo_validate_header(const struct header *expected_header,
172                         const struct header *actual_header, bool exact_size,
173                         const char *name)
174 {
175         int result;
176
177         if (expected_header->id != actual_header->id) {
178                 return uds_log_error_strerror(VDO_INCORRECT_COMPONENT,
179                                               "%s ID mismatch, expected %d, got %d",
180                                               name, expected_header->id,
181                                               actual_header->id);
182         }
183
184         result = validate_version(expected_header->version, actual_header->version,
185                                   name);
186         if (result != VDO_SUCCESS)
187                 return result;
188
189         if ((expected_header->size > actual_header->size) ||
190             (exact_size && (expected_header->size < actual_header->size))) {
191                 return uds_log_error_strerror(VDO_UNSUPPORTED_VERSION,
192                                               "%s size mismatch, expected %zu, got %zu",
193                                               name, expected_header->size,
194                                               actual_header->size);
195         }
196
197         return VDO_SUCCESS;
198 }
199
200 static void encode_version_number(u8 *buffer, size_t *offset,
201                                   struct version_number version)
202 {
203         struct packed_version_number packed = vdo_pack_version_number(version);
204
205         memcpy(buffer + *offset, &packed, sizeof(packed));
206         *offset += sizeof(packed);
207 }
208
209 void vdo_encode_header(u8 *buffer, size_t *offset, const struct header *header)
210 {
211         struct packed_header packed = vdo_pack_header(header);
212
213         memcpy(buffer + *offset, &packed, sizeof(packed));
214         *offset += sizeof(packed);
215 }
216
217 static void decode_version_number(u8 *buffer, size_t *offset,
218                                   struct version_number *version)
219 {
220         struct packed_version_number packed;
221
222         memcpy(&packed, buffer + *offset, sizeof(packed));
223         *offset += sizeof(packed);
224         *version = vdo_unpack_version_number(packed);
225 }
226
227 void vdo_decode_header(u8 *buffer, size_t *offset, struct header *header)
228 {
229         struct packed_header packed;
230
231         memcpy(&packed, buffer + *offset, sizeof(packed));
232         *offset += sizeof(packed);
233
234         *header = vdo_unpack_header(&packed);
235 }
236
237 /**
238  * decode_volume_geometry() - Decode the on-disk representation of a volume geometry from a buffer.
239  * @buffer: A buffer to decode from.
240  * @offset: The offset in the buffer at which to decode.
241  * @geometry: The structure to receive the decoded fields.
242  * @version: The geometry block version to decode.
243  */
244 static void decode_volume_geometry(u8 *buffer, size_t *offset,
245                                    struct volume_geometry *geometry, u32 version)
246 {
247         u32 unused, mem;
248         enum volume_region_id id;
249         nonce_t nonce;
250         block_count_t bio_offset = 0;
251         bool sparse;
252
253         /* This is for backwards compatibility. */
254         decode_u32_le(buffer, offset, &unused);
255         geometry->unused = unused;
256
257         decode_u64_le(buffer, offset, &nonce);
258         geometry->nonce = nonce;
259
260         memcpy((unsigned char *) &geometry->uuid, buffer + *offset, sizeof(uuid_t));
261         *offset += sizeof(uuid_t);
262
263         if (version > 4)
264                 decode_u64_le(buffer, offset, &bio_offset);
265         geometry->bio_offset = bio_offset;
266
267         for (id = 0; id < VDO_VOLUME_REGION_COUNT; id++) {
268                 physical_block_number_t start_block;
269                 enum volume_region_id saved_id;
270
271                 decode_u32_le(buffer, offset, &saved_id);
272                 decode_u64_le(buffer, offset, &start_block);
273
274                 geometry->regions[id] = (struct volume_region) {
275                         .id = saved_id,
276                         .start_block = start_block,
277                 };
278         }
279
280         decode_u32_le(buffer, offset, &mem);
281         *offset += sizeof(u32);
282         sparse = buffer[(*offset)++];
283
284         geometry->index_config = (struct index_config) {
285                 .mem = mem,
286                 .sparse = sparse,
287         };
288 }
289
290 /**
291  * vdo_parse_geometry_block() - Decode and validate an encoded geometry block.
292  * @block: The encoded geometry block.
293  * @geometry: The structure to receive the decoded fields.
294  */
295 int __must_check vdo_parse_geometry_block(u8 *block, struct volume_geometry *geometry)
296 {
297         u32 checksum, saved_checksum;
298         struct header header;
299         size_t offset = 0;
300         int result;
301
302         if (memcmp(block, VDO_GEOMETRY_MAGIC_NUMBER, VDO_GEOMETRY_MAGIC_NUMBER_SIZE) != 0)
303                 return VDO_BAD_MAGIC;
304         offset += VDO_GEOMETRY_MAGIC_NUMBER_SIZE;
305
306         vdo_decode_header(block, &offset, &header);
307         if (header.version.major_version <= 4) {
308                 result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_4_0, &header,
309                                              true, __func__);
310         } else {
311                 result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_5_0, &header,
312                                              true, __func__);
313         }
314         if (result != VDO_SUCCESS)
315                 return result;
316
317         decode_volume_geometry(block, &offset, geometry, header.version.major_version);
318
319         result = ASSERT(header.size == offset + sizeof(u32),
320                         "should have decoded up to the geometry checksum");
321         if (result != VDO_SUCCESS)
322                 return result;
323
324         /* Decode and verify the checksum. */
325         checksum = vdo_crc32(block, offset);
326         decode_u32_le(block, &offset, &saved_checksum);
327
328         return ((checksum == saved_checksum) ? VDO_SUCCESS : VDO_CHECKSUM_MISMATCH);
329 }
330
331 struct block_map_page *vdo_format_block_map_page(void *buffer, nonce_t nonce,
332                                                  physical_block_number_t pbn,
333                                                  bool initialized)
334 {
335         struct block_map_page *page = buffer;
336
337         memset(buffer, 0, VDO_BLOCK_SIZE);
338         page->version = vdo_pack_version_number(BLOCK_MAP_4_1);
339         page->header.nonce = __cpu_to_le64(nonce);
340         page->header.pbn = __cpu_to_le64(pbn);
341         page->header.initialized = initialized;
342         return page;
343 }
344
345 enum block_map_page_validity vdo_validate_block_map_page(struct block_map_page *page,
346                                                          nonce_t nonce,
347                                                          physical_block_number_t pbn)
348 {
349         BUILD_BUG_ON(sizeof(struct block_map_page_header) != PAGE_HEADER_4_1_SIZE);
350
351         if (!vdo_are_same_version(BLOCK_MAP_4_1,
352                                   vdo_unpack_version_number(page->version)) ||
353             !page->header.initialized || (nonce != __le64_to_cpu(page->header.nonce)))
354                 return VDO_BLOCK_MAP_PAGE_INVALID;
355
356         if (pbn != vdo_get_block_map_page_pbn(page))
357                 return VDO_BLOCK_MAP_PAGE_BAD;
358
359         return VDO_BLOCK_MAP_PAGE_VALID;
360 }
361
362 static int decode_block_map_state_2_0(u8 *buffer, size_t *offset,
363                                       struct block_map_state_2_0 *state)
364 {
365         size_t initial_offset;
366         block_count_t flat_page_count, root_count;
367         physical_block_number_t flat_page_origin, root_origin;
368         struct header header;
369         int result;
370
371         vdo_decode_header(buffer, offset, &header);
372         result = vdo_validate_header(&VDO_BLOCK_MAP_HEADER_2_0, &header, true, __func__);
373         if (result != VDO_SUCCESS)
374                 return result;
375
376         initial_offset = *offset;
377
378         decode_u64_le(buffer, offset, &flat_page_origin);
379         result = ASSERT(flat_page_origin == VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
380                         "Flat page origin must be %u (recorded as %llu)",
381                         VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
382                         (unsigned long long) state->flat_page_origin);
383         if (result != UDS_SUCCESS)
384                 return result;
385
386         decode_u64_le(buffer, offset, &flat_page_count);
387         result = ASSERT(flat_page_count == 0,
388                         "Flat page count must be 0 (recorded as %llu)",
389                         (unsigned long long) state->flat_page_count);
390         if (result != UDS_SUCCESS)
391                 return result;
392
393         decode_u64_le(buffer, offset, &root_origin);
394         decode_u64_le(buffer, offset, &root_count);
395
396         result = ASSERT(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset,
397                         "decoded block map component size must match header size");
398         if (result != VDO_SUCCESS)
399                 return result;
400
401         *state = (struct block_map_state_2_0) {
402                 .flat_page_origin = flat_page_origin,
403                 .flat_page_count = flat_page_count,
404                 .root_origin = root_origin,
405                 .root_count = root_count,
406         };
407
408         return VDO_SUCCESS;
409 }
410
411 static void encode_block_map_state_2_0(u8 *buffer, size_t *offset,
412                                        struct block_map_state_2_0 state)
413 {
414         size_t initial_offset;
415
416         vdo_encode_header(buffer, offset, &VDO_BLOCK_MAP_HEADER_2_0);
417
418         initial_offset = *offset;
419         encode_u64_le(buffer, offset, state.flat_page_origin);
420         encode_u64_le(buffer, offset, state.flat_page_count);
421         encode_u64_le(buffer, offset, state.root_origin);
422         encode_u64_le(buffer, offset, state.root_count);
423
424         ASSERT_LOG_ONLY(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset,
425                         "encoded block map component size must match header size");
426 }
427
428 /**
429  * vdo_compute_new_forest_pages() - Compute the number of pages which must be allocated at each
430  *                                  level in order to grow the forest to a new number of entries.
431  * @entries: The new number of entries the block map must address.
432  *
433  * Return: The total number of non-leaf pages required.
434  */
435 block_count_t vdo_compute_new_forest_pages(root_count_t root_count,
436                                            struct boundary *old_sizes,
437                                            block_count_t entries,
438                                            struct boundary *new_sizes)
439 {
440         page_count_t leaf_pages = max(vdo_compute_block_map_page_count(entries), 1U);
441         page_count_t level_size = DIV_ROUND_UP(leaf_pages, root_count);
442         block_count_t total_pages = 0;
443         height_t height;
444
445         for (height = 0; height < VDO_BLOCK_MAP_TREE_HEIGHT; height++) {
446                 block_count_t new_pages;
447
448                 level_size = DIV_ROUND_UP(level_size, VDO_BLOCK_MAP_ENTRIES_PER_PAGE);
449                 new_sizes->levels[height] = level_size;
450                 new_pages = level_size;
451                 if (old_sizes != NULL)
452                         new_pages -= old_sizes->levels[height];
453                 total_pages += (new_pages * root_count);
454         }
455
456         return total_pages;
457 }
458
459 /**
460  * encode_recovery_journal_state_7_0() - Encode the state of a recovery journal.
461  *
462  * Return: VDO_SUCCESS or an error code.
463  */
464 static void encode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
465                                               struct recovery_journal_state_7_0 state)
466 {
467         size_t initial_offset;
468
469         vdo_encode_header(buffer, offset, &VDO_RECOVERY_JOURNAL_HEADER_7_0);
470
471         initial_offset = *offset;
472         encode_u64_le(buffer, offset, state.journal_start);
473         encode_u64_le(buffer, offset, state.logical_blocks_used);
474         encode_u64_le(buffer, offset, state.block_map_data_blocks);
475
476         ASSERT_LOG_ONLY(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset,
477                         "encoded recovery journal component size must match header size");
478 }
479
480 /**
481  * decode_recovery_journal_state_7_0() - Decode the state of a recovery journal saved in a buffer.
482  * @buffer: The buffer containing the saved state.
483  * @state: A pointer to a recovery journal state to hold the result of a successful decode.
484  *
485  * Return: VDO_SUCCESS or an error code.
486  */
487 static int __must_check decode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
488                                                           struct recovery_journal_state_7_0 *state)
489 {
490         struct header header;
491         int result;
492         size_t initial_offset;
493         sequence_number_t journal_start;
494         block_count_t logical_blocks_used, block_map_data_blocks;
495
496         vdo_decode_header(buffer, offset, &header);
497         result = vdo_validate_header(&VDO_RECOVERY_JOURNAL_HEADER_7_0, &header, true,
498                                      __func__);
499         if (result != VDO_SUCCESS)
500                 return result;
501
502         initial_offset = *offset;
503         decode_u64_le(buffer, offset, &journal_start);
504         decode_u64_le(buffer, offset, &logical_blocks_used);
505         decode_u64_le(buffer, offset, &block_map_data_blocks);
506
507         result = ASSERT(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset,
508                         "decoded recovery journal component size must match header size");
509         if (result != UDS_SUCCESS)
510                 return result;
511
512         *state = (struct recovery_journal_state_7_0) {
513                 .journal_start = journal_start,
514                 .logical_blocks_used = logical_blocks_used,
515                 .block_map_data_blocks = block_map_data_blocks,
516         };
517
518         return VDO_SUCCESS;
519 }
520
521 /**
522  * vdo_get_journal_operation_name() - Get the name of a journal operation.
523  * @operation: The operation to name.
524  *
525  * Return: The name of the operation.
526  */
527 const char *vdo_get_journal_operation_name(enum journal_operation operation)
528 {
529         switch (operation) {
530         case VDO_JOURNAL_DATA_REMAPPING:
531                 return "data remapping";
532
533         case VDO_JOURNAL_BLOCK_MAP_REMAPPING:
534                 return "block map remapping";
535
536         default:
537                 return "unknown journal operation";
538         }
539 }
540
541 /**
542  * encode_slab_depot_state_2_0() - Encode the state of a slab depot into a buffer.
543  *
544  * Return: UDS_SUCCESS or an error.
545  */
546 static void encode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
547                                         struct slab_depot_state_2_0 state)
548 {
549         size_t initial_offset;
550
551         vdo_encode_header(buffer, offset, &VDO_SLAB_DEPOT_HEADER_2_0);
552
553         initial_offset = *offset;
554         encode_u64_le(buffer, offset, state.slab_config.slab_blocks);
555         encode_u64_le(buffer, offset, state.slab_config.data_blocks);
556         encode_u64_le(buffer, offset, state.slab_config.reference_count_blocks);
557         encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocks);
558         encode_u64_le(buffer, offset, state.slab_config.slab_journal_flushing_threshold);
559         encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocking_threshold);
560         encode_u64_le(buffer, offset, state.slab_config.slab_journal_scrubbing_threshold);
561         encode_u64_le(buffer, offset, state.first_block);
562         encode_u64_le(buffer, offset, state.last_block);
563         buffer[(*offset)++] = state.zone_count;
564
565         ASSERT_LOG_ONLY(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset,
566                         "encoded block map component size must match header size");
567 }
568
569 /**
570  * decode_slab_depot_state_2_0() - Decode slab depot component state version 2.0 from a buffer.
571  *
572  * Return: UDS_SUCCESS or an error code.
573  */
574 static int decode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
575                                        struct slab_depot_state_2_0 *state)
576 {
577         struct header header;
578         int result;
579         size_t initial_offset;
580         struct slab_config slab_config;
581         block_count_t count;
582         physical_block_number_t first_block, last_block;
583         zone_count_t zone_count;
584
585         vdo_decode_header(buffer, offset, &header);
586         result = vdo_validate_header(&VDO_SLAB_DEPOT_HEADER_2_0, &header, true,
587                                      __func__);
588         if (result != VDO_SUCCESS)
589                 return result;
590
591         initial_offset = *offset;
592         decode_u64_le(buffer, offset, &count);
593         slab_config.slab_blocks = count;
594
595         decode_u64_le(buffer, offset, &count);
596         slab_config.data_blocks = count;
597
598         decode_u64_le(buffer, offset, &count);
599         slab_config.reference_count_blocks = count;
600
601         decode_u64_le(buffer, offset, &count);
602         slab_config.slab_journal_blocks = count;
603
604         decode_u64_le(buffer, offset, &count);
605         slab_config.slab_journal_flushing_threshold = count;
606
607         decode_u64_le(buffer, offset, &count);
608         slab_config.slab_journal_blocking_threshold = count;
609
610         decode_u64_le(buffer, offset, &count);
611         slab_config.slab_journal_scrubbing_threshold = count;
612
613         decode_u64_le(buffer, offset, &first_block);
614         decode_u64_le(buffer, offset, &last_block);
615         zone_count = buffer[(*offset)++];
616
617         result = ASSERT(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset,
618                         "decoded slab depot component size must match header size");
619         if (result != UDS_SUCCESS)
620                 return result;
621
622         *state = (struct slab_depot_state_2_0) {
623                 .slab_config = slab_config,
624                 .first_block = first_block,
625                 .last_block = last_block,
626                 .zone_count = zone_count,
627         };
628
629         return VDO_SUCCESS;
630 }
631
632 /**
633  * vdo_configure_slab_depot() - Configure the slab depot.
634  * @partition: The slab depot partition
635  * @slab_config: The configuration of a single slab.
636  * @zone_count: The number of zones the depot will use.
637  * @state: The state structure to be configured.
638  *
639  * Configures the slab_depot for the specified storage capacity, finding the number of data blocks
640  * that will fit and still leave room for the depot metadata, then return the saved state for that
641  * configuration.
642  *
643  * Return: VDO_SUCCESS or an error code.
644  */
645 int vdo_configure_slab_depot(const struct partition *partition,
646                              struct slab_config slab_config, zone_count_t zone_count,
647                              struct slab_depot_state_2_0 *state)
648 {
649         block_count_t total_slab_blocks, total_data_blocks;
650         size_t slab_count;
651         physical_block_number_t last_block;
652         block_count_t slab_size = slab_config.slab_blocks;
653
654         uds_log_debug("slabDepot %s(block_count=%llu, first_block=%llu, slab_size=%llu, zone_count=%u)",
655                       __func__, (unsigned long long) partition->count,
656                       (unsigned long long) partition->offset,
657                       (unsigned long long) slab_size, zone_count);
658
659         /* We do not allow runt slabs, so we waste up to a slab's worth. */
660         slab_count = (partition->count / slab_size);
661         if (slab_count == 0)
662                 return VDO_NO_SPACE;
663
664         if (slab_count > MAX_VDO_SLABS)
665                 return VDO_TOO_MANY_SLABS;
666
667         total_slab_blocks = slab_count * slab_config.slab_blocks;
668         total_data_blocks = slab_count * slab_config.data_blocks;
669         last_block = partition->offset + total_slab_blocks;
670
671         *state = (struct slab_depot_state_2_0) {
672                 .slab_config = slab_config,
673                 .first_block = partition->offset,
674                 .last_block = last_block,
675                 .zone_count = zone_count,
676         };
677
678         uds_log_debug("slab_depot last_block=%llu, total_data_blocks=%llu, slab_count=%zu, left_over=%llu",
679                       (unsigned long long) last_block,
680                       (unsigned long long) total_data_blocks, slab_count,
681                       (unsigned long long) (partition->count - (last_block - partition->offset)));
682
683         return VDO_SUCCESS;
684 }
685
686 /**
687  * vdo_configure_slab() - Measure and initialize the configuration to use for each slab.
688  * @slab_size: The number of blocks per slab.
689  * @slab_journal_blocks: The number of blocks for the slab journal.
690  * @slab_config: The slab configuration to initialize.
691  *
692  * Return: VDO_SUCCESS or an error code.
693  */
694 int vdo_configure_slab(block_count_t slab_size, block_count_t slab_journal_blocks,
695                        struct slab_config *slab_config)
696 {
697         block_count_t ref_blocks, meta_blocks, data_blocks;
698         block_count_t flushing_threshold, remaining, blocking_threshold;
699         block_count_t minimal_extra_space, scrubbing_threshold;
700
701         if (slab_journal_blocks >= slab_size)
702                 return VDO_BAD_CONFIGURATION;
703
704         /*
705          * This calculation should technically be a recurrence, but the total number of metadata
706          * blocks is currently less than a single block of ref_counts, so we'd gain at most one
707          * data block in each slab with more iteration.
708          */
709         ref_blocks = vdo_get_saved_reference_count_size(slab_size - slab_journal_blocks);
710         meta_blocks = (ref_blocks + slab_journal_blocks);
711
712         /* Make sure test code hasn't configured slabs to be too small. */
713         if (meta_blocks >= slab_size)
714                 return VDO_BAD_CONFIGURATION;
715
716         /*
717          * If the slab size is very small, assume this must be a unit test and override the number
718          * of data blocks to be a power of two (wasting blocks in the slab). Many tests need their
719          * data_blocks fields to be the exact capacity of the configured volume, and that used to
720          * fall out since they use a power of two for the number of data blocks, the slab size was
721          * a power of two, and every block in a slab was a data block.
722          *
723          * TODO: Try to figure out some way of structuring testParameters and unit tests so this
724          * hack isn't needed without having to edit several unit tests every time the metadata size
725          * changes by one block.
726          */
727         data_blocks = slab_size - meta_blocks;
728         if ((slab_size < 1024) && !is_power_of_2(data_blocks))
729                 data_blocks = ((block_count_t) 1 << ilog2(data_blocks));
730
731         /*
732          * Configure the slab journal thresholds. The flush threshold is 168 of 224 blocks in
733          * production, or 3/4ths, so we use this ratio for all sizes.
734          */
735         flushing_threshold = ((slab_journal_blocks * 3) + 3) / 4;
736         /*
737          * The blocking threshold should be far enough from the flushing threshold to not produce
738          * delays, but far enough from the end of the journal to allow multiple successive recovery
739          * failures.
740          */
741         remaining = slab_journal_blocks - flushing_threshold;
742         blocking_threshold = flushing_threshold + ((remaining * 5) / 7);
743         /* The scrubbing threshold should be at least 2048 entries before the end of the journal. */
744         minimal_extra_space = 1 + (MAXIMUM_VDO_USER_VIOS / VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK);
745         scrubbing_threshold = blocking_threshold;
746         if (slab_journal_blocks > minimal_extra_space)
747                 scrubbing_threshold = slab_journal_blocks - minimal_extra_space;
748         if (blocking_threshold > scrubbing_threshold)
749                 blocking_threshold = scrubbing_threshold;
750
751         *slab_config = (struct slab_config) {
752                 .slab_blocks = slab_size,
753                 .data_blocks = data_blocks,
754                 .reference_count_blocks = ref_blocks,
755                 .slab_journal_blocks = slab_journal_blocks,
756                 .slab_journal_flushing_threshold = flushing_threshold,
757                 .slab_journal_blocking_threshold = blocking_threshold,
758                 .slab_journal_scrubbing_threshold = scrubbing_threshold};
759         return VDO_SUCCESS;
760 }
761
762 /**
763  * vdo_decode_slab_journal_entry() - Decode a slab journal entry.
764  * @block: The journal block holding the entry.
765  * @entry_count: The number of the entry.
766  *
767  * Return: The decoded entry.
768  */
769 struct slab_journal_entry vdo_decode_slab_journal_entry(struct packed_slab_journal_block *block,
770                                                         journal_entry_count_t entry_count)
771 {
772         struct slab_journal_entry entry =
773                 vdo_unpack_slab_journal_entry(&block->payload.entries[entry_count]);
774
775         if (block->header.has_block_map_increments &&
776             ((block->payload.full_entries.entry_types[entry_count / 8] &
777               ((u8) 1 << (entry_count % 8))) != 0))
778                 entry.operation = VDO_JOURNAL_BLOCK_MAP_REMAPPING;
779
780         return entry;
781 }
782
783 /**
784  * allocate_partition() - Allocate a partition and add it to a layout.
785  * @layout: The layout containing the partition.
786  * @id: The id of the partition.
787  * @offset: The offset into the layout at which the partition begins.
788  * @size: The size of the partition in blocks.
789  *
790  * Return: VDO_SUCCESS or an error.
791  */
792 static int allocate_partition(struct layout *layout, u8 id,
793                               physical_block_number_t offset, block_count_t size)
794 {
795         struct partition *partition;
796         int result;
797
798         result = uds_allocate(1, struct partition, __func__, &partition);
799         if (result != UDS_SUCCESS)
800                 return result;
801
802         partition->id = id;
803         partition->offset = offset;
804         partition->count = size;
805         partition->next = layout->head;
806         layout->head = partition;
807
808         return VDO_SUCCESS;
809 }
810
811 /**
812  * make_partition() - Create a new partition from the beginning or end of the unused space in a
813  *                    layout.
814  * @layout: The layout.
815  * @id: The id of the partition to make.
816  * @size: The number of blocks to carve out; if 0, all remaining space will be used.
817  * @beginning: True if the partition should start at the beginning of the unused space.
818  *
819  * Return: A success or error code, particularly VDO_NO_SPACE if there are fewer than size blocks
820  *         remaining.
821  */
822 static int __must_check make_partition(struct layout *layout, enum partition_id id,
823                                        block_count_t size, bool beginning)
824 {
825         int result;
826         physical_block_number_t offset;
827         block_count_t free_blocks = layout->last_free - layout->first_free;
828
829         if (size == 0) {
830                 if (free_blocks == 0)
831                         return VDO_NO_SPACE;
832                 size = free_blocks;
833         } else if (size > free_blocks) {
834                 return VDO_NO_SPACE;
835         }
836
837         result = vdo_get_partition(layout, id, NULL);
838         if (result != VDO_UNKNOWN_PARTITION)
839                 return VDO_PARTITION_EXISTS;
840
841         offset = beginning ? layout->first_free : (layout->last_free - size);
842
843         result = allocate_partition(layout, id, offset, size);
844         if (result != VDO_SUCCESS)
845                 return result;
846
847         layout->num_partitions++;
848         if (beginning)
849                 layout->first_free += size;
850         else
851                 layout->last_free = layout->last_free - size;
852
853         return VDO_SUCCESS;
854 }
855
856 /**
857  * vdo_initialize_layout() - Lay out the partitions of a vdo.
858  * @size: The entire size of the vdo.
859  * @origin: The start of the layout on the underlying storage in blocks.
860  * @block_map_blocks: The size of the block map partition.
861  * @journal_blocks: The size of the journal partition.
862  * @summary_blocks: The size of the slab summary partition.
863  * @layout: The layout to initialize.
864  *
865  * Return: VDO_SUCCESS or an error.
866  */
867 int vdo_initialize_layout(block_count_t size, physical_block_number_t offset,
868                           block_count_t block_map_blocks, block_count_t journal_blocks,
869                           block_count_t summary_blocks, struct layout *layout)
870 {
871         int result;
872         block_count_t necessary_size =
873                 (offset + block_map_blocks + journal_blocks + summary_blocks);
874
875         if (necessary_size > size)
876                 return uds_log_error_strerror(VDO_NO_SPACE,
877                                               "Not enough space to make a VDO");
878
879         *layout = (struct layout) {
880                 .start = offset,
881                 .size = size,
882                 .first_free = offset,
883                 .last_free = size,
884                 .num_partitions = 0,
885                 .head = NULL,
886         };
887
888         result = make_partition(layout, VDO_BLOCK_MAP_PARTITION, block_map_blocks, true);
889         if (result != VDO_SUCCESS) {
890                 vdo_uninitialize_layout(layout);
891                 return result;
892         }
893
894         result = make_partition(layout, VDO_SLAB_SUMMARY_PARTITION, summary_blocks,
895                                 false);
896         if (result != VDO_SUCCESS) {
897                 vdo_uninitialize_layout(layout);
898                 return result;
899         }
900
901         result = make_partition(layout, VDO_RECOVERY_JOURNAL_PARTITION, journal_blocks,
902                                 false);
903         if (result != VDO_SUCCESS) {
904                 vdo_uninitialize_layout(layout);
905                 return result;
906         }
907
908         result = make_partition(layout, VDO_SLAB_DEPOT_PARTITION, 0, true);
909         if (result != VDO_SUCCESS)
910                 vdo_uninitialize_layout(layout);
911
912         return result;
913 }
914
915 /**
916  * vdo_uninitialize_layout() - Clean up a layout.
917  * @layout: The layout to clean up.
918  *
919  * All partitions created by this layout become invalid pointers.
920  */
921 void vdo_uninitialize_layout(struct layout *layout)
922 {
923         while (layout->head != NULL) {
924                 struct partition *part = layout->head;
925
926                 layout->head = part->next;
927                 uds_free(part);
928         }
929
930         memset(layout, 0, sizeof(struct layout));
931 }
932
933 /**
934  * vdo_get_partition() - Get a partition by id.
935  * @layout: The layout from which to get a partition.
936  * @id: The id of the partition.
937  * @partition_ptr: A pointer to hold the partition.
938  *
939  * Return: VDO_SUCCESS or an error.
940  */
941 int vdo_get_partition(struct layout *layout, enum partition_id id,
942                       struct partition **partition_ptr)
943 {
944         struct partition *partition;
945
946         for (partition = layout->head; partition != NULL; partition = partition->next) {
947                 if (partition->id == id) {
948                         if (partition_ptr != NULL)
949                                 *partition_ptr = partition;
950                         return VDO_SUCCESS;
951                 }
952         }
953
954         return VDO_UNKNOWN_PARTITION;
955 }
956
957 /**
958  * vdo_get_known_partition() - Get a partition by id from a validated layout.
959  * @layout: The layout from which to get a partition.
960  * @id: The id of the partition.
961  *
962  * Return: the partition
963  */
964 struct partition *vdo_get_known_partition(struct layout *layout, enum partition_id id)
965 {
966         struct partition *partition;
967         int result = vdo_get_partition(layout, id, &partition);
968
969         ASSERT_LOG_ONLY(result == VDO_SUCCESS, "layout has expected partition: %u", id);
970
971         return partition;
972 }
973
974 static void encode_layout(u8 *buffer, size_t *offset, const struct layout *layout)
975 {
976         const struct partition *partition;
977         size_t initial_offset;
978         struct header header = VDO_LAYOUT_HEADER_3_0;
979
980         BUILD_BUG_ON(sizeof(enum partition_id) != sizeof(u8));
981         ASSERT_LOG_ONLY(layout->num_partitions <= U8_MAX,
982                         "layout partition count must fit in a byte");
983
984         vdo_encode_header(buffer, offset, &header);
985
986         initial_offset = *offset;
987         encode_u64_le(buffer, offset, layout->first_free);
988         encode_u64_le(buffer, offset, layout->last_free);
989         buffer[(*offset)++] = layout->num_partitions;
990
991         ASSERT_LOG_ONLY(sizeof(struct layout_3_0) == *offset - initial_offset,
992                         "encoded size of a layout header must match structure");
993
994         for (partition = layout->head; partition != NULL; partition = partition->next) {
995                 buffer[(*offset)++] = partition->id;
996                 encode_u64_le(buffer, offset, partition->offset);
997                 /* This field only exists for backwards compatibility */
998                 encode_u64_le(buffer, offset, 0);
999                 encode_u64_le(buffer, offset, partition->count);
1000         }
1001
1002         ASSERT_LOG_ONLY(header.size == *offset - initial_offset,
1003                         "encoded size of a layout must match header size");
1004 }
1005
1006 static int decode_layout(u8 *buffer, size_t *offset, physical_block_number_t start,
1007                          block_count_t size, struct layout *layout)
1008 {
1009         struct header header;
1010         struct layout_3_0 layout_header;
1011         struct partition *partition;
1012         size_t initial_offset;
1013         physical_block_number_t first_free, last_free;
1014         u8 partition_count;
1015         u8 i;
1016         int result;
1017
1018         vdo_decode_header(buffer, offset, &header);
1019         /* Layout is variable size, so only do a minimum size check here. */
1020         result = vdo_validate_header(&VDO_LAYOUT_HEADER_3_0, &header, false, __func__);
1021         if (result != VDO_SUCCESS)
1022                 return result;
1023
1024         initial_offset = *offset;
1025         decode_u64_le(buffer, offset, &first_free);
1026         decode_u64_le(buffer, offset, &last_free);
1027         partition_count = buffer[(*offset)++];
1028         layout_header = (struct layout_3_0) {
1029                 .first_free = first_free,
1030                 .last_free = last_free,
1031                 .partition_count = partition_count,
1032         };
1033
1034         result = ASSERT(sizeof(struct layout_3_0) == *offset - initial_offset,
1035                         "decoded size of a layout header must match structure");
1036         if (result != VDO_SUCCESS)
1037                 return result;
1038
1039         layout->start = start;
1040         layout->size = size;
1041         layout->first_free = layout_header.first_free;
1042         layout->last_free = layout_header.last_free;
1043         layout->num_partitions = layout_header.partition_count;
1044
1045         if (layout->num_partitions > VDO_PARTITION_COUNT) {
1046                 return uds_log_error_strerror(VDO_UNKNOWN_PARTITION,
1047                                               "layout has extra partitions");
1048         }
1049
1050         for (i = 0; i < layout->num_partitions; i++) {
1051                 u8 id;
1052                 u64 partition_offset, count;
1053
1054                 id = buffer[(*offset)++];
1055                 decode_u64_le(buffer, offset, &partition_offset);
1056                 *offset += sizeof(u64);
1057                 decode_u64_le(buffer, offset, &count);
1058
1059                 result = allocate_partition(layout, id, partition_offset, count);
1060                 if (result != VDO_SUCCESS) {
1061                         vdo_uninitialize_layout(layout);
1062                         return result;
1063                 }
1064         }
1065
1066         /* Validate that the layout has all (and only) the required partitions */
1067         for (i = 0; i < VDO_PARTITION_COUNT; i++) {
1068                 result = vdo_get_partition(layout, REQUIRED_PARTITIONS[i], &partition);
1069                 if (result != VDO_SUCCESS) {
1070                         vdo_uninitialize_layout(layout);
1071                         return uds_log_error_strerror(result,
1072                                                       "layout is missing required partition %u",
1073                                                       REQUIRED_PARTITIONS[i]);
1074                 }
1075
1076                 start += partition->count;
1077         }
1078
1079         if (start != size) {
1080                 vdo_uninitialize_layout(layout);
1081                 return uds_log_error_strerror(UDS_BAD_STATE,
1082                                               "partitions do not cover the layout");
1083         }
1084
1085         return VDO_SUCCESS;
1086 }
1087
1088 /**
1089  * pack_vdo_config() - Convert a vdo_config to its packed on-disk representation.
1090  * @config: The vdo config to convert.
1091  *
1092  * Return: The platform-independent representation of the config.
1093  */
1094 static struct packed_vdo_config pack_vdo_config(struct vdo_config config)
1095 {
1096         return (struct packed_vdo_config) {
1097                 .logical_blocks = __cpu_to_le64(config.logical_blocks),
1098                 .physical_blocks = __cpu_to_le64(config.physical_blocks),
1099                 .slab_size = __cpu_to_le64(config.slab_size),
1100                 .recovery_journal_size = __cpu_to_le64(config.recovery_journal_size),
1101                 .slab_journal_blocks = __cpu_to_le64(config.slab_journal_blocks),
1102         };
1103 }
1104
1105 /**
1106  * pack_vdo_component() - Convert a vdo_component to its packed on-disk representation.
1107  * @component: The VDO component data to convert.
1108  *
1109  * Return: The platform-independent representation of the component.
1110  */
1111 static struct packed_vdo_component_41_0 pack_vdo_component(const struct vdo_component component)
1112 {
1113         return (struct packed_vdo_component_41_0) {
1114                 .state = __cpu_to_le32(component.state),
1115                 .complete_recoveries = __cpu_to_le64(component.complete_recoveries),
1116                 .read_only_recoveries = __cpu_to_le64(component.read_only_recoveries),
1117                 .config = pack_vdo_config(component.config),
1118                 .nonce = __cpu_to_le64(component.nonce),
1119         };
1120 }
1121
1122 static void encode_vdo_component(u8 *buffer, size_t *offset,
1123                                  struct vdo_component component)
1124 {
1125         struct packed_vdo_component_41_0 packed;
1126
1127         encode_version_number(buffer, offset, VDO_COMPONENT_DATA_41_0);
1128         packed = pack_vdo_component(component);
1129         memcpy(buffer + *offset, &packed, sizeof(packed));
1130         *offset += sizeof(packed);
1131 }
1132
1133 /**
1134  * unpack_vdo_config() - Convert a packed_vdo_config to its native in-memory representation.
1135  * @config: The packed vdo config to convert.
1136  *
1137  * Return: The native in-memory representation of the vdo config.
1138  */
1139 static struct vdo_config unpack_vdo_config(struct packed_vdo_config config)
1140 {
1141         return (struct vdo_config) {
1142                 .logical_blocks = __le64_to_cpu(config.logical_blocks),
1143                 .physical_blocks = __le64_to_cpu(config.physical_blocks),
1144                 .slab_size = __le64_to_cpu(config.slab_size),
1145                 .recovery_journal_size = __le64_to_cpu(config.recovery_journal_size),
1146                 .slab_journal_blocks = __le64_to_cpu(config.slab_journal_blocks),
1147         };
1148 }
1149
1150 /**
1151  * unpack_vdo_component_41_0() - Convert a packed_vdo_component_41_0 to its native in-memory
1152  *                               representation.
1153  * @component: The packed vdo component data to convert.
1154  *
1155  * Return: The native in-memory representation of the component.
1156  */
1157 static struct vdo_component unpack_vdo_component_41_0(struct packed_vdo_component_41_0 component)
1158 {
1159         return (struct vdo_component) {
1160                 .state = __le32_to_cpu(component.state),
1161                 .complete_recoveries = __le64_to_cpu(component.complete_recoveries),
1162                 .read_only_recoveries = __le64_to_cpu(component.read_only_recoveries),
1163                 .config = unpack_vdo_config(component.config),
1164                 .nonce = __le64_to_cpu(component.nonce),
1165         };
1166 }
1167
1168 /**
1169  * vdo_decode_component() - Decode the component data for the vdo itself out of the super block.
1170  *
1171  * Return: VDO_SUCCESS or an error.
1172  */
1173 static int decode_vdo_component(u8 *buffer, size_t *offset, struct vdo_component *component)
1174 {
1175         struct version_number version;
1176         struct packed_vdo_component_41_0 packed;
1177         int result;
1178
1179         decode_version_number(buffer, offset, &version);
1180         result = validate_version(version, VDO_COMPONENT_DATA_41_0,
1181                                   "VDO component data");
1182         if (result != VDO_SUCCESS)
1183                 return result;
1184
1185         memcpy(&packed, buffer + *offset, sizeof(packed));
1186         *offset += sizeof(packed);
1187         *component = unpack_vdo_component_41_0(packed);
1188         return VDO_SUCCESS;
1189 }
1190
1191 /**
1192  * vdo_validate_config() - Validate constraints on a VDO config.
1193  * @config: The VDO config.
1194  * @physical_block_count: The minimum block count of the underlying storage.
1195  * @logical_block_count: The expected logical size of the VDO, or 0 if the logical size may be
1196  *                       unspecified.
1197  *
1198  * Return: A success or error code.
1199  */
1200 int vdo_validate_config(const struct vdo_config *config,
1201                         block_count_t physical_block_count,
1202                         block_count_t logical_block_count)
1203 {
1204         struct slab_config slab_config;
1205         int result;
1206
1207         result = ASSERT(config->slab_size > 0, "slab size unspecified");
1208         if (result != UDS_SUCCESS)
1209                 return result;
1210
1211         result = ASSERT(is_power_of_2(config->slab_size),
1212                         "slab size must be a power of two");
1213         if (result != UDS_SUCCESS)
1214                 return result;
1215
1216         result = ASSERT(config->slab_size <= (1 << MAX_VDO_SLAB_BITS),
1217                         "slab size must be less than or equal to 2^%d",
1218                         MAX_VDO_SLAB_BITS);
1219         if (result != VDO_SUCCESS)
1220                 return result;
1221
1222         result = ASSERT(config->slab_journal_blocks >= MINIMUM_VDO_SLAB_JOURNAL_BLOCKS,
1223                         "slab journal size meets minimum size");
1224         if (result != UDS_SUCCESS)
1225                 return result;
1226
1227         result = ASSERT(config->slab_journal_blocks <= config->slab_size,
1228                         "slab journal size is within expected bound");
1229         if (result != UDS_SUCCESS)
1230                 return result;
1231
1232         result = vdo_configure_slab(config->slab_size, config->slab_journal_blocks,
1233                                     &slab_config);
1234         if (result != VDO_SUCCESS)
1235                 return result;
1236
1237         result = ASSERT((slab_config.data_blocks >= 1),
1238                         "slab must be able to hold at least one block");
1239         if (result != UDS_SUCCESS)
1240                 return result;
1241
1242         result = ASSERT(config->physical_blocks > 0, "physical blocks unspecified");
1243         if (result != UDS_SUCCESS)
1244                 return result;
1245
1246         result = ASSERT(config->physical_blocks <= MAXIMUM_VDO_PHYSICAL_BLOCKS,
1247                         "physical block count %llu exceeds maximum %llu",
1248                         (unsigned long long) config->physical_blocks,
1249                         (unsigned long long) MAXIMUM_VDO_PHYSICAL_BLOCKS);
1250         if (result != UDS_SUCCESS)
1251                 return VDO_OUT_OF_RANGE;
1252
1253         if (physical_block_count != config->physical_blocks) {
1254                 uds_log_error("A physical size of %llu blocks was specified, not the %llu blocks configured in the vdo super block",
1255                               (unsigned long long) physical_block_count,
1256                               (unsigned long long) config->physical_blocks);
1257                 return VDO_PARAMETER_MISMATCH;
1258         }
1259
1260         if (logical_block_count > 0) {
1261                 result = ASSERT((config->logical_blocks > 0),
1262                                 "logical blocks unspecified");
1263                 if (result != UDS_SUCCESS)
1264                         return result;
1265
1266                 if (logical_block_count != config->logical_blocks) {
1267                         uds_log_error("A logical size of %llu blocks was specified, but that differs from the %llu blocks configured in the vdo super block",
1268                                       (unsigned long long) logical_block_count,
1269                                       (unsigned long long) config->logical_blocks);
1270                         return VDO_PARAMETER_MISMATCH;
1271                 }
1272         }
1273
1274         result = ASSERT(config->logical_blocks <= MAXIMUM_VDO_LOGICAL_BLOCKS,
1275                         "logical blocks too large");
1276         if (result != UDS_SUCCESS)
1277                 return result;
1278
1279         result = ASSERT(config->recovery_journal_size > 0,
1280                         "recovery journal size unspecified");
1281         if (result != UDS_SUCCESS)
1282                 return result;
1283
1284         result = ASSERT(is_power_of_2(config->recovery_journal_size),
1285                         "recovery journal size must be a power of two");
1286         if (result != UDS_SUCCESS)
1287                 return result;
1288
1289         return result;
1290 }
1291
1292 /**
1293  * vdo_destroy_component_states() - Clean up any allocations in a vdo_component_states.
1294  * @states: The component states to destroy.
1295  */
1296 void vdo_destroy_component_states(struct vdo_component_states *states)
1297 {
1298         if (states == NULL)
1299                 return;
1300
1301         vdo_uninitialize_layout(&states->layout);
1302 }
1303
1304 /**
1305  * decode_components() - Decode the components now that we know the component data is a version we
1306  *                       understand.
1307  * @buffer: The buffer being decoded.
1308  * @offset: The offset to start decoding from.
1309  * @geometry: The vdo geometry
1310  * @states: An object to hold the successfully decoded state.
1311  *
1312  * Return: VDO_SUCCESS or an error.
1313  */
1314 static int __must_check decode_components(u8 *buffer, size_t *offset,
1315                                           struct volume_geometry *geometry,
1316                                           struct vdo_component_states *states)
1317 {
1318         int result;
1319
1320         decode_vdo_component(buffer, offset, &states->vdo);
1321
1322         result = decode_layout(buffer, offset, vdo_get_data_region_start(*geometry) + 1,
1323                                states->vdo.config.physical_blocks, &states->layout);
1324         if (result != VDO_SUCCESS)
1325                 return result;
1326
1327         result = decode_recovery_journal_state_7_0(buffer, offset,
1328                                                    &states->recovery_journal);
1329         if (result != VDO_SUCCESS)
1330                 return result;
1331
1332         result = decode_slab_depot_state_2_0(buffer, offset, &states->slab_depot);
1333         if (result != VDO_SUCCESS)
1334                 return result;
1335
1336         result = decode_block_map_state_2_0(buffer, offset, &states->block_map);
1337         if (result != VDO_SUCCESS)
1338                 return result;
1339
1340         ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE,
1341                         "All decoded component data was used");
1342         return VDO_SUCCESS;
1343 }
1344
1345 /**
1346  * vdo_decode_component_states() - Decode the payload of a super block.
1347  * @buffer: The buffer containing the encoded super block contents.
1348  * @geometry: The vdo geometry
1349  * @states: A pointer to hold the decoded states.
1350  *
1351  * Return: VDO_SUCCESS or an error.
1352  */
1353 int vdo_decode_component_states(u8 *buffer, struct volume_geometry *geometry,
1354                                 struct vdo_component_states *states)
1355 {
1356         int result;
1357         size_t offset = VDO_COMPONENT_DATA_OFFSET;
1358
1359         /* This is for backwards compatibility. */
1360         decode_u32_le(buffer, &offset, &states->unused);
1361
1362         /* Check the VDO volume version */
1363         decode_version_number(buffer, &offset, &states->volume_version);
1364         result = validate_version(VDO_VOLUME_VERSION_67_0, states->volume_version,
1365                                   "volume");
1366         if (result != VDO_SUCCESS)
1367                 return result;
1368
1369         result = decode_components(buffer, &offset, geometry, states);
1370         if (result != VDO_SUCCESS)
1371                 vdo_uninitialize_layout(&states->layout);
1372
1373         return result;
1374 }
1375
1376 /**
1377  * vdo_validate_component_states() - Validate the decoded super block configuration.
1378  * @states: The state decoded from the super block.
1379  * @geometry_nonce: The nonce from the geometry block.
1380  * @physical_size: The minimum block count of the underlying storage.
1381  * @logical_size: The expected logical size of the VDO, or 0 if the logical size may be
1382  *                unspecified.
1383  *
1384  * Return: VDO_SUCCESS or an error if the configuration is invalid.
1385  */
1386 int vdo_validate_component_states(struct vdo_component_states *states,
1387                                   nonce_t geometry_nonce, block_count_t physical_size,
1388                                   block_count_t logical_size)
1389 {
1390         if (geometry_nonce != states->vdo.nonce) {
1391                 return uds_log_error_strerror(VDO_BAD_NONCE,
1392                                               "Geometry nonce %llu does not match superblock nonce %llu",
1393                                               (unsigned long long) geometry_nonce,
1394                                               (unsigned long long) states->vdo.nonce);
1395         }
1396
1397         return vdo_validate_config(&states->vdo.config, physical_size, logical_size);
1398 }
1399
1400 /**
1401  * vdo_encode_component_states() - Encode the state of all vdo components in the super block.
1402  */
1403 static void vdo_encode_component_states(u8 *buffer, size_t *offset,
1404                                         const struct vdo_component_states *states)
1405 {
1406         /* This is for backwards compatibility. */
1407         encode_u32_le(buffer, offset, states->unused);
1408         encode_version_number(buffer, offset, states->volume_version);
1409         encode_vdo_component(buffer, offset, states->vdo);
1410         encode_layout(buffer, offset, &states->layout);
1411         encode_recovery_journal_state_7_0(buffer, offset, states->recovery_journal);
1412         encode_slab_depot_state_2_0(buffer, offset, states->slab_depot);
1413         encode_block_map_state_2_0(buffer, offset, states->block_map);
1414
1415         ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE,
1416                         "All super block component data was encoded");
1417 }
1418
1419 /**
1420  * vdo_encode_super_block() - Encode a super block into its on-disk representation.
1421  */
1422 void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states)
1423 {
1424         u32 checksum;
1425         struct header header = SUPER_BLOCK_HEADER_12_0;
1426         size_t offset = 0;
1427
1428         header.size += VDO_COMPONENT_DATA_SIZE;
1429         vdo_encode_header(buffer, &offset, &header);
1430         vdo_encode_component_states(buffer, &offset, states);
1431
1432         checksum = vdo_crc32(buffer, offset);
1433         encode_u32_le(buffer, &offset, checksum);
1434
1435         /*
1436          * Even though the buffer is a full block, to avoid the potential corruption from a torn
1437          * write, the entire encoding must fit in the first sector.
1438          */
1439         ASSERT_LOG_ONLY(offset <= VDO_SECTOR_SIZE,
1440                         "entire superblock must fit in one sector");
1441 }
1442
1443 /**
1444  * vdo_decode_super_block() - Decode a super block from its on-disk representation.
1445  */
1446 int vdo_decode_super_block(u8 *buffer)
1447 {
1448         struct header header;
1449         int result;
1450         u32 checksum, saved_checksum;
1451         size_t offset = 0;
1452
1453         /* Decode and validate the header. */
1454         vdo_decode_header(buffer, &offset, &header);
1455         result = vdo_validate_header(&SUPER_BLOCK_HEADER_12_0, &header, false, __func__);
1456         if (result != VDO_SUCCESS)
1457                 return result;
1458
1459         if (header.size > VDO_COMPONENT_DATA_SIZE + sizeof(u32)) {
1460                 /*
1461                  * We can't check release version or checksum until we know the content size, so we
1462                  * have to assume a version mismatch on unexpected values.
1463                  */
1464                 return uds_log_error_strerror(VDO_UNSUPPORTED_VERSION,
1465                                               "super block contents too large: %zu",
1466                                               header.size);
1467         }
1468
1469         /* Skip past the component data for now, to verify the checksum. */
1470         offset += VDO_COMPONENT_DATA_SIZE;
1471
1472         checksum = vdo_crc32(buffer, offset);
1473         decode_u32_le(buffer, &offset, &saved_checksum);
1474
1475         result = ASSERT(offset == VDO_SUPER_BLOCK_FIXED_SIZE + VDO_COMPONENT_DATA_SIZE,
1476                         "must have decoded entire superblock payload");
1477         if (result != VDO_SUCCESS)
1478                 return result;
1479
1480         return ((checksum != saved_checksum) ? VDO_CHECKSUM_MISMATCH : VDO_SUCCESS);
1481 }