btrfs: use list_for_each_entry* in check-integrity.c
[linux-2.6-block.git] / fs / btrfs / check-integrity.c
CommitLineData
5db02760
SB
1/*
2 * Copyright (C) STRATO AG 2011. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19/*
20 * This module can be used to catch cases when the btrfs kernel
21 * code executes write requests to the disk that bring the file
22 * system in an inconsistent state. In such a state, a power-loss
23 * or kernel panic event would cause that the data on disk is
24 * lost or at least damaged.
25 *
26 * Code is added that examines all block write requests during
27 * runtime (including writes of the super block). Three rules
28 * are verified and an error is printed on violation of the
29 * rules:
30 * 1. It is not allowed to write a disk block which is
31 * currently referenced by the super block (either directly
32 * or indirectly).
33 * 2. When a super block is written, it is verified that all
34 * referenced (directly or indirectly) blocks fulfill the
35 * following requirements:
36 * 2a. All referenced blocks have either been present when
37 * the file system was mounted, (i.e., they have been
38 * referenced by the super block) or they have been
39 * written since then and the write completion callback
62856a9b
SB
40 * was called and no write error was indicated and a
41 * FLUSH request to the device where these blocks are
42 * located was received and completed.
5db02760
SB
43 * 2b. All referenced blocks need to have a generation
44 * number which is equal to the parent's number.
45 *
46 * One issue that was found using this module was that the log
47 * tree on disk became temporarily corrupted because disk blocks
48 * that had been in use for the log tree had been freed and
49 * reused too early, while being referenced by the written super
50 * block.
51 *
52 * The search term in the kernel log that can be used to filter
53 * on the existence of detected integrity issues is
54 * "btrfs: attempt".
55 *
56 * The integrity check is enabled via mount options. These
57 * mount options are only supported if the integrity check
58 * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
59 *
60 * Example #1, apply integrity checks to all metadata:
61 * mount /dev/sdb1 /mnt -o check_int
62 *
63 * Example #2, apply integrity checks to all metadata and
64 * to data extents:
65 * mount /dev/sdb1 /mnt -o check_int_data
66 *
67 * Example #3, apply integrity checks to all metadata and dump
68 * the tree that the super block references to kernel messages
69 * each time after a super block was written:
70 * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
71 *
72 * If the integrity check tool is included and activated in
73 * the mount options, plenty of kernel memory is used, and
74 * plenty of additional CPU cycles are spent. Enabling this
75 * functionality is not intended for normal use. In most
76 * cases, unless you are a btrfs developer who needs to verify
77 * the integrity of (super)-block write requests, do not
78 * enable the config option BTRFS_FS_CHECK_INTEGRITY to
79 * include and compile the integrity check tool.
56d140f5
SB
80 *
81 * Expect millions of lines of information in the kernel log with an
82 * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the
83 * kernel config to at least 26 (which is 64MB). Usually the value is
84 * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be
85 * changed like this before LOG_BUF_SHIFT can be set to a high value:
86 * config LOG_BUF_SHIFT
87 * int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
88 * range 12 30
5db02760
SB
89 */
90
91#include <linux/sched.h>
92#include <linux/slab.h>
93#include <linux/buffer_head.h>
94#include <linux/mutex.h>
5db02760
SB
95#include <linux/genhd.h>
96#include <linux/blkdev.h>
8f608de6 97#include <linux/vmalloc.h>
5db02760
SB
98#include "ctree.h"
99#include "disk-io.h"
0b947aff 100#include "hash.h"
5db02760
SB
101#include "transaction.h"
102#include "extent_io.h"
5db02760
SB
103#include "volumes.h"
104#include "print-tree.h"
105#include "locking.h"
106#include "check-integrity.h"
606686ee 107#include "rcu-string.h"
5db02760
SB
108
109#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
110#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
111#define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
112#define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
113#define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
114#define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
115#define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
116#define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters,
117 * excluding " [...]" */
5db02760
SB
118#define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
119
120/*
121 * The definition of the bitmask fields for the print_mask.
122 * They are specified with the mount option check_integrity_print_mask.
123 */
124#define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001
125#define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002
126#define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004
127#define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008
128#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010
129#define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020
130#define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040
131#define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080
132#define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100
133#define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200
134#define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400
135#define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800
136#define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000
56d140f5 137#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE 0x00002000
5db02760
SB
138
139struct btrfsic_dev_state;
140struct btrfsic_state;
141
142struct btrfsic_block {
143 u32 magic_num; /* only used for debug purposes */
144 unsigned int is_metadata:1; /* if it is meta-data, not data-data */
145 unsigned int is_superblock:1; /* if it is one of the superblocks */
146 unsigned int is_iodone:1; /* if is done by lower subsystem */
147 unsigned int iodone_w_error:1; /* error was indicated to endio */
148 unsigned int never_written:1; /* block was added because it was
149 * referenced, not because it was
150 * written */
cb3806ec 151 unsigned int mirror_num; /* large enough to hold
5db02760
SB
152 * BTRFS_SUPER_MIRROR_MAX */
153 struct btrfsic_dev_state *dev_state;
154 u64 dev_bytenr; /* key, physical byte num on disk */
155 u64 logical_bytenr; /* logical byte num on disk */
156 u64 generation;
157 struct btrfs_disk_key disk_key; /* extra info to print in case of
158 * issues, will not always be correct */
159 struct list_head collision_resolving_node; /* list node */
160 struct list_head all_blocks_node; /* list node */
161
162 /* the following two lists contain block_link items */
163 struct list_head ref_to_list; /* list */
164 struct list_head ref_from_list; /* list */
165 struct btrfsic_block *next_in_same_bio;
166 void *orig_bio_bh_private;
167 union {
168 bio_end_io_t *bio;
169 bh_end_io_t *bh;
170 } orig_bio_bh_end_io;
171 int submit_bio_bh_rw;
172 u64 flush_gen; /* only valid if !never_written */
173};
174
175/*
176 * Elements of this type are allocated dynamically and required because
177 * each block object can refer to and can be ref from multiple blocks.
178 * The key to lookup them in the hashtable is the dev_bytenr of
179 * the block ref to plus the one from the block refered from.
180 * The fact that they are searchable via a hashtable and that a
181 * ref_cnt is maintained is not required for the btrfs integrity
182 * check algorithm itself, it is only used to make the output more
183 * beautiful in case that an error is detected (an error is defined
184 * as a write operation to a block while that block is still referenced).
185 */
186struct btrfsic_block_link {
187 u32 magic_num; /* only used for debug purposes */
188 u32 ref_cnt;
189 struct list_head node_ref_to; /* list node */
190 struct list_head node_ref_from; /* list node */
191 struct list_head collision_resolving_node; /* list node */
192 struct btrfsic_block *block_ref_to;
193 struct btrfsic_block *block_ref_from;
194 u64 parent_generation;
195};
196
197struct btrfsic_dev_state {
198 u32 magic_num; /* only used for debug purposes */
199 struct block_device *bdev;
200 struct btrfsic_state *state;
201 struct list_head collision_resolving_node; /* list node */
202 struct btrfsic_block dummy_block_for_bio_bh_flush;
203 u64 last_flush_gen;
204 char name[BDEVNAME_SIZE];
205};
206
207struct btrfsic_block_hashtable {
208 struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
209};
210
211struct btrfsic_block_link_hashtable {
212 struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
213};
214
215struct btrfsic_dev_state_hashtable {
216 struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
217};
218
219struct btrfsic_block_data_ctx {
220 u64 start; /* virtual bytenr */
221 u64 dev_bytenr; /* physical bytenr on device */
222 u32 len;
223 struct btrfsic_dev_state *dev;
e06baab4
SB
224 char **datav;
225 struct page **pagev;
226 void *mem_to_free;
5db02760
SB
227};
228
229/* This structure is used to implement recursion without occupying
230 * any stack space, refer to btrfsic_process_metablock() */
231struct btrfsic_stack_frame {
232 u32 magic;
233 u32 nr;
234 int error;
235 int i;
236 int limit_nesting;
237 int num_copies;
238 int mirror_num;
239 struct btrfsic_block *block;
240 struct btrfsic_block_data_ctx *block_ctx;
241 struct btrfsic_block *next_block;
242 struct btrfsic_block_data_ctx next_block_ctx;
243 struct btrfs_header *hdr;
244 struct btrfsic_stack_frame *prev;
245};
246
247/* Some state per mounted filesystem */
248struct btrfsic_state {
249 u32 print_mask;
250 int include_extent_data;
251 int csum_size;
252 struct list_head all_blocks_list;
253 struct btrfsic_block_hashtable block_hashtable;
254 struct btrfsic_block_link_hashtable block_link_hashtable;
255 struct btrfs_root *root;
256 u64 max_superblock_generation;
257 struct btrfsic_block *latest_superblock;
e06baab4
SB
258 u32 metablock_size;
259 u32 datablock_size;
5db02760
SB
260};
261
262static void btrfsic_block_init(struct btrfsic_block *b);
263static struct btrfsic_block *btrfsic_block_alloc(void);
264static void btrfsic_block_free(struct btrfsic_block *b);
265static void btrfsic_block_link_init(struct btrfsic_block_link *n);
266static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
267static void btrfsic_block_link_free(struct btrfsic_block_link *n);
268static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
269static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
270static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
271static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
272static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
273 struct btrfsic_block_hashtable *h);
274static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
275static struct btrfsic_block *btrfsic_block_hashtable_lookup(
276 struct block_device *bdev,
277 u64 dev_bytenr,
278 struct btrfsic_block_hashtable *h);
279static void btrfsic_block_link_hashtable_init(
280 struct btrfsic_block_link_hashtable *h);
281static void btrfsic_block_link_hashtable_add(
282 struct btrfsic_block_link *l,
283 struct btrfsic_block_link_hashtable *h);
284static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
285static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
286 struct block_device *bdev_ref_to,
287 u64 dev_bytenr_ref_to,
288 struct block_device *bdev_ref_from,
289 u64 dev_bytenr_ref_from,
290 struct btrfsic_block_link_hashtable *h);
291static void btrfsic_dev_state_hashtable_init(
292 struct btrfsic_dev_state_hashtable *h);
293static void btrfsic_dev_state_hashtable_add(
294 struct btrfsic_dev_state *ds,
295 struct btrfsic_dev_state_hashtable *h);
296static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
297static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
298 struct block_device *bdev,
299 struct btrfsic_dev_state_hashtable *h);
300static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
301static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
302static int btrfsic_process_superblock(struct btrfsic_state *state,
303 struct btrfs_fs_devices *fs_devices);
304static int btrfsic_process_metablock(struct btrfsic_state *state,
305 struct btrfsic_block *block,
306 struct btrfsic_block_data_ctx *block_ctx,
5db02760 307 int limit_nesting, int force_iodone_flag);
e06baab4
SB
308static void btrfsic_read_from_block_data(
309 struct btrfsic_block_data_ctx *block_ctx,
310 void *dst, u32 offset, size_t len);
5db02760
SB
311static int btrfsic_create_link_to_next_block(
312 struct btrfsic_state *state,
313 struct btrfsic_block *block,
314 struct btrfsic_block_data_ctx
315 *block_ctx, u64 next_bytenr,
316 int limit_nesting,
317 struct btrfsic_block_data_ctx *next_block_ctx,
318 struct btrfsic_block **next_blockp,
319 int force_iodone_flag,
320 int *num_copiesp, int *mirror_nump,
321 struct btrfs_disk_key *disk_key,
322 u64 parent_generation);
323static int btrfsic_handle_extent_data(struct btrfsic_state *state,
324 struct btrfsic_block *block,
325 struct btrfsic_block_data_ctx *block_ctx,
326 u32 item_offset, int force_iodone_flag);
327static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
328 struct btrfsic_block_data_ctx *block_ctx_out,
329 int mirror_num);
5db02760
SB
330static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
331static int btrfsic_read_block(struct btrfsic_state *state,
332 struct btrfsic_block_data_ctx *block_ctx);
333static void btrfsic_dump_database(struct btrfsic_state *state);
334static int btrfsic_test_for_metadata(struct btrfsic_state *state,
e06baab4 335 char **datav, unsigned int num_pages);
5db02760 336static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
e06baab4
SB
337 u64 dev_bytenr, char **mapped_datav,
338 unsigned int num_pages,
339 struct bio *bio, int *bio_is_patched,
5db02760
SB
340 struct buffer_head *bh,
341 int submit_bio_bh_rw);
342static int btrfsic_process_written_superblock(
343 struct btrfsic_state *state,
344 struct btrfsic_block *const block,
345 struct btrfs_super_block *const super_hdr);
4246a0b6 346static void btrfsic_bio_end_io(struct bio *bp);
5db02760
SB
347static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
348static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
349 const struct btrfsic_block *block,
350 int recursion_level);
351static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
352 struct btrfsic_block *const block,
353 int recursion_level);
354static void btrfsic_print_add_link(const struct btrfsic_state *state,
355 const struct btrfsic_block_link *l);
356static void btrfsic_print_rem_link(const struct btrfsic_state *state,
357 const struct btrfsic_block_link *l);
358static char btrfsic_get_block_type(const struct btrfsic_state *state,
359 const struct btrfsic_block *block);
360static void btrfsic_dump_tree(const struct btrfsic_state *state);
361static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
362 const struct btrfsic_block *block,
363 int indent_level);
364static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
365 struct btrfsic_state *state,
366 struct btrfsic_block_data_ctx *next_block_ctx,
367 struct btrfsic_block *next_block,
368 struct btrfsic_block *from_block,
369 u64 parent_generation);
370static struct btrfsic_block *btrfsic_block_lookup_or_add(
371 struct btrfsic_state *state,
372 struct btrfsic_block_data_ctx *block_ctx,
373 const char *additional_string,
374 int is_metadata,
375 int is_iodone,
376 int never_written,
377 int mirror_num,
378 int *was_created);
379static int btrfsic_process_superblock_dev_mirror(
380 struct btrfsic_state *state,
381 struct btrfsic_dev_state *dev_state,
382 struct btrfs_device *device,
383 int superblock_mirror_num,
384 struct btrfsic_dev_state **selected_dev_state,
385 struct btrfs_super_block *selected_super);
386static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
387 struct block_device *bdev);
388static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
389 u64 bytenr,
390 struct btrfsic_dev_state *dev_state,
e06baab4 391 u64 dev_bytenr);
5db02760
SB
392
393static struct mutex btrfsic_mutex;
394static int btrfsic_is_initialized;
395static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;
396
397
398static void btrfsic_block_init(struct btrfsic_block *b)
399{
400 b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
401 b->dev_state = NULL;
402 b->dev_bytenr = 0;
403 b->logical_bytenr = 0;
404 b->generation = BTRFSIC_GENERATION_UNKNOWN;
405 b->disk_key.objectid = 0;
406 b->disk_key.type = 0;
407 b->disk_key.offset = 0;
408 b->is_metadata = 0;
409 b->is_superblock = 0;
410 b->is_iodone = 0;
411 b->iodone_w_error = 0;
412 b->never_written = 0;
413 b->mirror_num = 0;
414 b->next_in_same_bio = NULL;
415 b->orig_bio_bh_private = NULL;
416 b->orig_bio_bh_end_io.bio = NULL;
417 INIT_LIST_HEAD(&b->collision_resolving_node);
418 INIT_LIST_HEAD(&b->all_blocks_node);
419 INIT_LIST_HEAD(&b->ref_to_list);
420 INIT_LIST_HEAD(&b->ref_from_list);
421 b->submit_bio_bh_rw = 0;
422 b->flush_gen = 0;
423}
424
425static struct btrfsic_block *btrfsic_block_alloc(void)
426{
427 struct btrfsic_block *b;
428
429 b = kzalloc(sizeof(*b), GFP_NOFS);
430 if (NULL != b)
431 btrfsic_block_init(b);
432
433 return b;
434}
435
436static void btrfsic_block_free(struct btrfsic_block *b)
437{
438 BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
439 kfree(b);
440}
441
442static void btrfsic_block_link_init(struct btrfsic_block_link *l)
443{
444 l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
445 l->ref_cnt = 1;
446 INIT_LIST_HEAD(&l->node_ref_to);
447 INIT_LIST_HEAD(&l->node_ref_from);
448 INIT_LIST_HEAD(&l->collision_resolving_node);
449 l->block_ref_to = NULL;
450 l->block_ref_from = NULL;
451}
452
453static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
454{
455 struct btrfsic_block_link *l;
456
457 l = kzalloc(sizeof(*l), GFP_NOFS);
458 if (NULL != l)
459 btrfsic_block_link_init(l);
460
461 return l;
462}
463
464static void btrfsic_block_link_free(struct btrfsic_block_link *l)
465{
466 BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
467 kfree(l);
468}
469
470static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
471{
472 ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
473 ds->bdev = NULL;
474 ds->state = NULL;
475 ds->name[0] = '\0';
476 INIT_LIST_HEAD(&ds->collision_resolving_node);
477 ds->last_flush_gen = 0;
478 btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
479 ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
480 ds->dummy_block_for_bio_bh_flush.dev_state = ds;
481}
482
483static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
484{
485 struct btrfsic_dev_state *ds;
486
487 ds = kzalloc(sizeof(*ds), GFP_NOFS);
488 if (NULL != ds)
489 btrfsic_dev_state_init(ds);
490
491 return ds;
492}
493
494static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
495{
496 BUG_ON(!(NULL == ds ||
497 BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
498 kfree(ds);
499}
500
501static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
502{
503 int i;
504
505 for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
506 INIT_LIST_HEAD(h->table + i);
507}
508
509static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
510 struct btrfsic_block_hashtable *h)
511{
512 const unsigned int hashval =
513 (((unsigned int)(b->dev_bytenr >> 16)) ^
514 ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
515 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
516
517 list_add(&b->collision_resolving_node, h->table + hashval);
518}
519
520static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
521{
522 list_del(&b->collision_resolving_node);
523}
524
525static struct btrfsic_block *btrfsic_block_hashtable_lookup(
526 struct block_device *bdev,
527 u64 dev_bytenr,
528 struct btrfsic_block_hashtable *h)
529{
530 const unsigned int hashval =
531 (((unsigned int)(dev_bytenr >> 16)) ^
532 ((unsigned int)((uintptr_t)bdev))) &
533 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
b69f2bef 534 struct btrfsic_block *b;
5db02760 535
b69f2bef 536 list_for_each_entry(b, h->table + hashval, collision_resolving_node) {
5db02760
SB
537 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
538 return b;
539 }
540
541 return NULL;
542}
543
544static void btrfsic_block_link_hashtable_init(
545 struct btrfsic_block_link_hashtable *h)
546{
547 int i;
548
549 for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
550 INIT_LIST_HEAD(h->table + i);
551}
552
553static void btrfsic_block_link_hashtable_add(
554 struct btrfsic_block_link *l,
555 struct btrfsic_block_link_hashtable *h)
556{
557 const unsigned int hashval =
558 (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
559 ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
560 ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
561 ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
562 & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
563
564 BUG_ON(NULL == l->block_ref_to);
565 BUG_ON(NULL == l->block_ref_from);
566 list_add(&l->collision_resolving_node, h->table + hashval);
567}
568
569static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
570{
571 list_del(&l->collision_resolving_node);
572}
573
574static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
575 struct block_device *bdev_ref_to,
576 u64 dev_bytenr_ref_to,
577 struct block_device *bdev_ref_from,
578 u64 dev_bytenr_ref_from,
579 struct btrfsic_block_link_hashtable *h)
580{
581 const unsigned int hashval =
582 (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
583 ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
584 ((unsigned int)((uintptr_t)bdev_ref_to)) ^
585 ((unsigned int)((uintptr_t)bdev_ref_from))) &
586 (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
b69f2bef 587 struct btrfsic_block_link *l;
5db02760 588
b69f2bef 589 list_for_each_entry(l, h->table + hashval, collision_resolving_node) {
5db02760
SB
590 BUG_ON(NULL == l->block_ref_to);
591 BUG_ON(NULL == l->block_ref_from);
592 if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
593 l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
594 l->block_ref_from->dev_state->bdev == bdev_ref_from &&
595 l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
596 return l;
597 }
598
599 return NULL;
600}
601
602static void btrfsic_dev_state_hashtable_init(
603 struct btrfsic_dev_state_hashtable *h)
604{
605 int i;
606
607 for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
608 INIT_LIST_HEAD(h->table + i);
609}
610
611static void btrfsic_dev_state_hashtable_add(
612 struct btrfsic_dev_state *ds,
613 struct btrfsic_dev_state_hashtable *h)
614{
615 const unsigned int hashval =
616 (((unsigned int)((uintptr_t)ds->bdev)) &
617 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
618
619 list_add(&ds->collision_resolving_node, h->table + hashval);
620}
621
622static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
623{
624 list_del(&ds->collision_resolving_node);
625}
626
627static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
628 struct block_device *bdev,
629 struct btrfsic_dev_state_hashtable *h)
630{
631 const unsigned int hashval =
632 (((unsigned int)((uintptr_t)bdev)) &
633 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
b69f2bef 634 struct btrfsic_dev_state *ds;
5db02760 635
b69f2bef 636 list_for_each_entry(ds, h->table + hashval, collision_resolving_node) {
5db02760
SB
637 if (ds->bdev == bdev)
638 return ds;
639 }
640
641 return NULL;
642}
643
644static int btrfsic_process_superblock(struct btrfsic_state *state,
645 struct btrfs_fs_devices *fs_devices)
646{
e77266e4 647 int ret = 0;
5db02760
SB
648 struct btrfs_super_block *selected_super;
649 struct list_head *dev_head = &fs_devices->devices;
650 struct btrfs_device *device;
651 struct btrfsic_dev_state *selected_dev_state = NULL;
652 int pass;
653
654 BUG_ON(NULL == state);
e06baab4 655 selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
5db02760
SB
656 if (NULL == selected_super) {
657 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
0b8d8ce0 658 return -ENOMEM;
5db02760
SB
659 }
660
661 list_for_each_entry(device, dev_head, dev_list) {
662 int i;
663 struct btrfsic_dev_state *dev_state;
664
665 if (!device->bdev || !device->name)
666 continue;
667
668 dev_state = btrfsic_dev_state_lookup(device->bdev);
669 BUG_ON(NULL == dev_state);
670 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
671 ret = btrfsic_process_superblock_dev_mirror(
672 state, dev_state, device, i,
673 &selected_dev_state, selected_super);
674 if (0 != ret && 0 == i) {
675 kfree(selected_super);
676 return ret;
677 }
678 }
679 }
680
681 if (NULL == state->latest_superblock) {
682 printk(KERN_INFO "btrfsic: no superblock found!\n");
683 kfree(selected_super);
684 return -1;
685 }
686
687 state->csum_size = btrfs_super_csum_size(selected_super);
688
689 for (pass = 0; pass < 3; pass++) {
690 int num_copies;
691 int mirror_num;
692 u64 next_bytenr;
693
694 switch (pass) {
695 case 0:
696 next_bytenr = btrfs_super_root(selected_super);
697 if (state->print_mask &
698 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
c1c9ff7c 699 printk(KERN_INFO "root@%llu\n", next_bytenr);
5db02760
SB
700 break;
701 case 1:
702 next_bytenr = btrfs_super_chunk_root(selected_super);
703 if (state->print_mask &
704 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
c1c9ff7c 705 printk(KERN_INFO "chunk@%llu\n", next_bytenr);
5db02760
SB
706 break;
707 case 2:
708 next_bytenr = btrfs_super_log_root(selected_super);
709 if (0 == next_bytenr)
710 continue;
711 if (state->print_mask &
712 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
c1c9ff7c 713 printk(KERN_INFO "log@%llu\n", next_bytenr);
5db02760
SB
714 break;
715 }
716
717 num_copies =
5d964051 718 btrfs_num_copies(state->root->fs_info,
e06baab4 719 next_bytenr, state->metablock_size);
5db02760
SB
720 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
721 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
c1c9ff7c 722 next_bytenr, num_copies);
5db02760
SB
723
724 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
725 struct btrfsic_block *next_block;
726 struct btrfsic_block_data_ctx tmp_next_block_ctx;
727 struct btrfsic_block_link *l;
5db02760 728
e06baab4
SB
729 ret = btrfsic_map_block(state, next_bytenr,
730 state->metablock_size,
5db02760
SB
731 &tmp_next_block_ctx,
732 mirror_num);
733 if (ret) {
734 printk(KERN_INFO "btrfsic:"
735 " btrfsic_map_block(root @%llu,"
736 " mirror %d) failed!\n",
c1c9ff7c 737 next_bytenr, mirror_num);
5db02760
SB
738 kfree(selected_super);
739 return -1;
740 }
741
742 next_block = btrfsic_block_hashtable_lookup(
743 tmp_next_block_ctx.dev->bdev,
744 tmp_next_block_ctx.dev_bytenr,
745 &state->block_hashtable);
746 BUG_ON(NULL == next_block);
747
748 l = btrfsic_block_link_hashtable_lookup(
749 tmp_next_block_ctx.dev->bdev,
750 tmp_next_block_ctx.dev_bytenr,
751 state->latest_superblock->dev_state->
752 bdev,
753 state->latest_superblock->dev_bytenr,
754 &state->block_link_hashtable);
755 BUG_ON(NULL == l);
756
757 ret = btrfsic_read_block(state, &tmp_next_block_ctx);
e06baab4 758 if (ret < (int)PAGE_CACHE_SIZE) {
5db02760
SB
759 printk(KERN_INFO
760 "btrfsic: read @logical %llu failed!\n",
5db02760
SB
761 tmp_next_block_ctx.start);
762 btrfsic_release_block_ctx(&tmp_next_block_ctx);
763 kfree(selected_super);
764 return -1;
765 }
766
5db02760
SB
767 ret = btrfsic_process_metablock(state,
768 next_block,
769 &tmp_next_block_ctx,
5db02760
SB
770 BTRFS_MAX_LEVEL + 3, 1);
771 btrfsic_release_block_ctx(&tmp_next_block_ctx);
772 }
773 }
774
775 kfree(selected_super);
776 return ret;
777}
778
779static int btrfsic_process_superblock_dev_mirror(
780 struct btrfsic_state *state,
781 struct btrfsic_dev_state *dev_state,
782 struct btrfs_device *device,
783 int superblock_mirror_num,
784 struct btrfsic_dev_state **selected_dev_state,
785 struct btrfs_super_block *selected_super)
786{
787 struct btrfs_super_block *super_tmp;
788 u64 dev_bytenr;
789 struct buffer_head *bh;
790 struct btrfsic_block *superblock_tmp;
791 int pass;
792 struct block_device *const superblock_bdev = device->bdev;
793
794 /* super block bytenr is always the unmapped device bytenr */
795 dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
935e5cc9 796 if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->commit_total_bytes)
e06baab4
SB
797 return -1;
798 bh = __bread(superblock_bdev, dev_bytenr / 4096,
799 BTRFS_SUPER_INFO_SIZE);
5db02760
SB
800 if (NULL == bh)
801 return -1;
802 super_tmp = (struct btrfs_super_block *)
803 (bh->b_data + (dev_bytenr & 4095));
804
805 if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
3cae210f 806 btrfs_super_magic(super_tmp) != BTRFS_MAGIC ||
e06baab4
SB
807 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
808 btrfs_super_nodesize(super_tmp) != state->metablock_size ||
e06baab4 809 btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
5db02760
SB
810 brelse(bh);
811 return 0;
812 }
813
814 superblock_tmp =
815 btrfsic_block_hashtable_lookup(superblock_bdev,
816 dev_bytenr,
817 &state->block_hashtable);
818 if (NULL == superblock_tmp) {
819 superblock_tmp = btrfsic_block_alloc();
820 if (NULL == superblock_tmp) {
821 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
822 brelse(bh);
823 return -1;
824 }
825 /* for superblock, only the dev_bytenr makes sense */
826 superblock_tmp->dev_bytenr = dev_bytenr;
827 superblock_tmp->dev_state = dev_state;
828 superblock_tmp->logical_bytenr = dev_bytenr;
829 superblock_tmp->generation = btrfs_super_generation(super_tmp);
830 superblock_tmp->is_metadata = 1;
831 superblock_tmp->is_superblock = 1;
832 superblock_tmp->is_iodone = 1;
833 superblock_tmp->never_written = 0;
834 superblock_tmp->mirror_num = 1 + superblock_mirror_num;
835 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
ecaeb14b
DS
836 btrfs_info_in_rcu(device->dev_root->fs_info,
837 "new initial S-block (bdev %p, %s) @%llu (%s/%llu/%d)",
606686ee 838 superblock_bdev,
c1c9ff7c
GU
839 rcu_str_deref(device->name), dev_bytenr,
840 dev_state->name, dev_bytenr,
606686ee 841 superblock_mirror_num);
5db02760
SB
842 list_add(&superblock_tmp->all_blocks_node,
843 &state->all_blocks_list);
844 btrfsic_block_hashtable_add(superblock_tmp,
845 &state->block_hashtable);
846 }
847
848 /* select the one with the highest generation field */
849 if (btrfs_super_generation(super_tmp) >
850 state->max_superblock_generation ||
851 0 == state->max_superblock_generation) {
852 memcpy(selected_super, super_tmp, sizeof(*selected_super));
853 *selected_dev_state = dev_state;
854 state->max_superblock_generation =
855 btrfs_super_generation(super_tmp);
856 state->latest_superblock = superblock_tmp;
857 }
858
859 for (pass = 0; pass < 3; pass++) {
860 u64 next_bytenr;
861 int num_copies;
862 int mirror_num;
863 const char *additional_string = NULL;
864 struct btrfs_disk_key tmp_disk_key;
865
866 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
867 tmp_disk_key.offset = 0;
868 switch (pass) {
869 case 0:
3cae210f
QW
870 btrfs_set_disk_key_objectid(&tmp_disk_key,
871 BTRFS_ROOT_TREE_OBJECTID);
5db02760
SB
872 additional_string = "initial root ";
873 next_bytenr = btrfs_super_root(super_tmp);
874 break;
875 case 1:
3cae210f
QW
876 btrfs_set_disk_key_objectid(&tmp_disk_key,
877 BTRFS_CHUNK_TREE_OBJECTID);
5db02760
SB
878 additional_string = "initial chunk ";
879 next_bytenr = btrfs_super_chunk_root(super_tmp);
880 break;
881 case 2:
3cae210f
QW
882 btrfs_set_disk_key_objectid(&tmp_disk_key,
883 BTRFS_TREE_LOG_OBJECTID);
5db02760
SB
884 additional_string = "initial log ";
885 next_bytenr = btrfs_super_log_root(super_tmp);
886 if (0 == next_bytenr)
887 continue;
888 break;
889 }
890
891 num_copies =
5d964051 892 btrfs_num_copies(state->root->fs_info,
e06baab4 893 next_bytenr, state->metablock_size);
5db02760
SB
894 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
895 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
c1c9ff7c 896 next_bytenr, num_copies);
5db02760
SB
897 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
898 struct btrfsic_block *next_block;
899 struct btrfsic_block_data_ctx tmp_next_block_ctx;
900 struct btrfsic_block_link *l;
901
e06baab4
SB
902 if (btrfsic_map_block(state, next_bytenr,
903 state->metablock_size,
5db02760
SB
904 &tmp_next_block_ctx,
905 mirror_num)) {
906 printk(KERN_INFO "btrfsic: btrfsic_map_block("
907 "bytenr @%llu, mirror %d) failed!\n",
c1c9ff7c 908 next_bytenr, mirror_num);
5db02760
SB
909 brelse(bh);
910 return -1;
911 }
912
913 next_block = btrfsic_block_lookup_or_add(
914 state, &tmp_next_block_ctx,
915 additional_string, 1, 1, 0,
916 mirror_num, NULL);
917 if (NULL == next_block) {
918 btrfsic_release_block_ctx(&tmp_next_block_ctx);
919 brelse(bh);
920 return -1;
921 }
922
923 next_block->disk_key = tmp_disk_key;
924 next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
925 l = btrfsic_block_link_lookup_or_add(
926 state, &tmp_next_block_ctx,
927 next_block, superblock_tmp,
928 BTRFSIC_GENERATION_UNKNOWN);
929 btrfsic_release_block_ctx(&tmp_next_block_ctx);
930 if (NULL == l) {
931 brelse(bh);
932 return -1;
933 }
934 }
935 }
936 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
937 btrfsic_dump_tree_sub(state, superblock_tmp, 0);
938
939 brelse(bh);
940 return 0;
941}
942
943static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
944{
945 struct btrfsic_stack_frame *sf;
946
947 sf = kzalloc(sizeof(*sf), GFP_NOFS);
948 if (NULL == sf)
949 printk(KERN_INFO "btrfsic: alloc memory failed!\n");
950 else
951 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
952 return sf;
953}
954
955static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
956{
957 BUG_ON(!(NULL == sf ||
958 BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
959 kfree(sf);
960}
961
962static int btrfsic_process_metablock(
963 struct btrfsic_state *state,
964 struct btrfsic_block *const first_block,
965 struct btrfsic_block_data_ctx *const first_block_ctx,
5db02760
SB
966 int first_limit_nesting, int force_iodone_flag)
967{
968 struct btrfsic_stack_frame initial_stack_frame = { 0 };
969 struct btrfsic_stack_frame *sf;
970 struct btrfsic_stack_frame *next_stack;
e06baab4
SB
971 struct btrfs_header *const first_hdr =
972 (struct btrfs_header *)first_block_ctx->datav[0];
5db02760 973
e06baab4 974 BUG_ON(!first_hdr);
5db02760
SB
975 sf = &initial_stack_frame;
976 sf->error = 0;
977 sf->i = -1;
978 sf->limit_nesting = first_limit_nesting;
979 sf->block = first_block;
980 sf->block_ctx = first_block_ctx;
981 sf->next_block = NULL;
982 sf->hdr = first_hdr;
983 sf->prev = NULL;
984
985continue_with_new_stack_frame:
986 sf->block->generation = le64_to_cpu(sf->hdr->generation);
987 if (0 == sf->hdr->level) {
988 struct btrfs_leaf *const leafhdr =
989 (struct btrfs_leaf *)sf->hdr;
990
991 if (-1 == sf->i) {
3cae210f 992 sf->nr = btrfs_stack_header_nritems(&leafhdr->header);
5db02760
SB
993
994 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
995 printk(KERN_INFO
996 "leaf %llu items %d generation %llu"
997 " owner %llu\n",
c1c9ff7c 998 sf->block_ctx->start, sf->nr,
3cae210f
QW
999 btrfs_stack_header_generation(
1000 &leafhdr->header),
3cae210f
QW
1001 btrfs_stack_header_owner(
1002 &leafhdr->header));
5db02760
SB
1003 }
1004
1005continue_with_current_leaf_stack_frame:
1006 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1007 sf->i++;
1008 sf->num_copies = 0;
1009 }
1010
1011 if (sf->i < sf->nr) {
e06baab4
SB
1012 struct btrfs_item disk_item;
1013 u32 disk_item_offset =
1014 (uintptr_t)(leafhdr->items + sf->i) -
1015 (uintptr_t)leafhdr;
1016 struct btrfs_disk_key *disk_key;
5db02760 1017 u8 type;
e06baab4 1018 u32 item_offset;
8ea05e3a 1019 u32 item_size;
5db02760 1020
e06baab4
SB
1021 if (disk_item_offset + sizeof(struct btrfs_item) >
1022 sf->block_ctx->len) {
1023leaf_item_out_of_bounce_error:
1024 printk(KERN_INFO
1025 "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
1026 sf->block_ctx->start,
1027 sf->block_ctx->dev->name);
1028 goto one_stack_frame_backwards;
1029 }
1030 btrfsic_read_from_block_data(sf->block_ctx,
1031 &disk_item,
1032 disk_item_offset,
1033 sizeof(struct btrfs_item));
3cae210f 1034 item_offset = btrfs_stack_item_offset(&disk_item);
a5f519c9 1035 item_size = btrfs_stack_item_size(&disk_item);
e06baab4 1036 disk_key = &disk_item.key;
3cae210f 1037 type = btrfs_disk_key_type(disk_key);
5db02760
SB
1038
1039 if (BTRFS_ROOT_ITEM_KEY == type) {
e06baab4
SB
1040 struct btrfs_root_item root_item;
1041 u32 root_item_offset;
1042 u64 next_bytenr;
1043
1044 root_item_offset = item_offset +
1045 offsetof(struct btrfs_leaf, items);
8ea05e3a 1046 if (root_item_offset + item_size >
e06baab4
SB
1047 sf->block_ctx->len)
1048 goto leaf_item_out_of_bounce_error;
1049 btrfsic_read_from_block_data(
1050 sf->block_ctx, &root_item,
1051 root_item_offset,
8ea05e3a 1052 item_size);
3cae210f 1053 next_bytenr = btrfs_root_bytenr(&root_item);
5db02760
SB
1054
1055 sf->error =
1056 btrfsic_create_link_to_next_block(
1057 state,
1058 sf->block,
1059 sf->block_ctx,
1060 next_bytenr,
1061 sf->limit_nesting,
1062 &sf->next_block_ctx,
1063 &sf->next_block,
1064 force_iodone_flag,
1065 &sf->num_copies,
1066 &sf->mirror_num,
1067 disk_key,
3cae210f
QW
1068 btrfs_root_generation(
1069 &root_item));
5db02760
SB
1070 if (sf->error)
1071 goto one_stack_frame_backwards;
1072
1073 if (NULL != sf->next_block) {
1074 struct btrfs_header *const next_hdr =
1075 (struct btrfs_header *)
e06baab4 1076 sf->next_block_ctx.datav[0];
5db02760
SB
1077
1078 next_stack =
1079 btrfsic_stack_frame_alloc();
1080 if (NULL == next_stack) {
98806b44 1081 sf->error = -1;
5db02760
SB
1082 btrfsic_release_block_ctx(
1083 &sf->
1084 next_block_ctx);
1085 goto one_stack_frame_backwards;
1086 }
1087
1088 next_stack->i = -1;
1089 next_stack->block = sf->next_block;
1090 next_stack->block_ctx =
1091 &sf->next_block_ctx;
1092 next_stack->next_block = NULL;
1093 next_stack->hdr = next_hdr;
1094 next_stack->limit_nesting =
1095 sf->limit_nesting - 1;
1096 next_stack->prev = sf;
1097 sf = next_stack;
1098 goto continue_with_new_stack_frame;
1099 }
1100 } else if (BTRFS_EXTENT_DATA_KEY == type &&
1101 state->include_extent_data) {
1102 sf->error = btrfsic_handle_extent_data(
1103 state,
1104 sf->block,
1105 sf->block_ctx,
1106 item_offset,
1107 force_iodone_flag);
1108 if (sf->error)
1109 goto one_stack_frame_backwards;
1110 }
1111
1112 goto continue_with_current_leaf_stack_frame;
1113 }
1114 } else {
1115 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
1116
1117 if (-1 == sf->i) {
3cae210f 1118 sf->nr = btrfs_stack_header_nritems(&nodehdr->header);
5db02760
SB
1119
1120 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1121 printk(KERN_INFO "node %llu level %d items %d"
1122 " generation %llu owner %llu\n",
5db02760
SB
1123 sf->block_ctx->start,
1124 nodehdr->header.level, sf->nr,
3cae210f
QW
1125 btrfs_stack_header_generation(
1126 &nodehdr->header),
3cae210f
QW
1127 btrfs_stack_header_owner(
1128 &nodehdr->header));
5db02760
SB
1129 }
1130
1131continue_with_current_node_stack_frame:
1132 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1133 sf->i++;
1134 sf->num_copies = 0;
1135 }
1136
1137 if (sf->i < sf->nr) {
e06baab4
SB
1138 struct btrfs_key_ptr key_ptr;
1139 u32 key_ptr_offset;
1140 u64 next_bytenr;
1141
1142 key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
1143 (uintptr_t)nodehdr;
1144 if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
1145 sf->block_ctx->len) {
1146 printk(KERN_INFO
1147 "btrfsic: node item out of bounce at logical %llu, dev %s\n",
1148 sf->block_ctx->start,
1149 sf->block_ctx->dev->name);
1150 goto one_stack_frame_backwards;
1151 }
1152 btrfsic_read_from_block_data(
1153 sf->block_ctx, &key_ptr, key_ptr_offset,
1154 sizeof(struct btrfs_key_ptr));
3cae210f 1155 next_bytenr = btrfs_stack_key_blockptr(&key_ptr);
5db02760
SB
1156
1157 sf->error = btrfsic_create_link_to_next_block(
1158 state,
1159 sf->block,
1160 sf->block_ctx,
1161 next_bytenr,
1162 sf->limit_nesting,
1163 &sf->next_block_ctx,
1164 &sf->next_block,
1165 force_iodone_flag,
1166 &sf->num_copies,
1167 &sf->mirror_num,
e06baab4 1168 &key_ptr.key,
3cae210f 1169 btrfs_stack_key_generation(&key_ptr));
5db02760
SB
1170 if (sf->error)
1171 goto one_stack_frame_backwards;
1172
1173 if (NULL != sf->next_block) {
1174 struct btrfs_header *const next_hdr =
1175 (struct btrfs_header *)
e06baab4 1176 sf->next_block_ctx.datav[0];
5db02760
SB
1177
1178 next_stack = btrfsic_stack_frame_alloc();
98806b44
SB
1179 if (NULL == next_stack) {
1180 sf->error = -1;
5db02760 1181 goto one_stack_frame_backwards;
98806b44 1182 }
5db02760
SB
1183
1184 next_stack->i = -1;
1185 next_stack->block = sf->next_block;
1186 next_stack->block_ctx = &sf->next_block_ctx;
1187 next_stack->next_block = NULL;
1188 next_stack->hdr = next_hdr;
1189 next_stack->limit_nesting =
1190 sf->limit_nesting - 1;
1191 next_stack->prev = sf;
1192 sf = next_stack;
1193 goto continue_with_new_stack_frame;
1194 }
1195
1196 goto continue_with_current_node_stack_frame;
1197 }
1198 }
1199
1200one_stack_frame_backwards:
1201 if (NULL != sf->prev) {
1202 struct btrfsic_stack_frame *const prev = sf->prev;
1203
1204 /* the one for the initial block is freed in the caller */
1205 btrfsic_release_block_ctx(sf->block_ctx);
1206
1207 if (sf->error) {
1208 prev->error = sf->error;
1209 btrfsic_stack_frame_free(sf);
1210 sf = prev;
1211 goto one_stack_frame_backwards;
1212 }
1213
1214 btrfsic_stack_frame_free(sf);
1215 sf = prev;
1216 goto continue_with_new_stack_frame;
1217 } else {
1218 BUG_ON(&initial_stack_frame != sf);
1219 }
1220
1221 return sf->error;
1222}
1223
e06baab4
SB
1224static void btrfsic_read_from_block_data(
1225 struct btrfsic_block_data_ctx *block_ctx,
1226 void *dstv, u32 offset, size_t len)
1227{
1228 size_t cur;
1229 size_t offset_in_page;
1230 char *kaddr;
1231 char *dst = (char *)dstv;
1232 size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
1233 unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
1234
1235 WARN_ON(offset + len > block_ctx->len);
778746b5 1236 offset_in_page = (start_offset + offset) & (PAGE_CACHE_SIZE - 1);
e06baab4
SB
1237
1238 while (len > 0) {
1239 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
ed6078f7 1240 BUG_ON(i >= DIV_ROUND_UP(block_ctx->len, PAGE_CACHE_SIZE));
e06baab4
SB
1241 kaddr = block_ctx->datav[i];
1242 memcpy(dst, kaddr + offset_in_page, cur);
1243
1244 dst += cur;
1245 len -= cur;
1246 offset_in_page = 0;
1247 i++;
1248 }
1249}
1250
5db02760
SB
1251static int btrfsic_create_link_to_next_block(
1252 struct btrfsic_state *state,
1253 struct btrfsic_block *block,
1254 struct btrfsic_block_data_ctx *block_ctx,
1255 u64 next_bytenr,
1256 int limit_nesting,
1257 struct btrfsic_block_data_ctx *next_block_ctx,
1258 struct btrfsic_block **next_blockp,
1259 int force_iodone_flag,
1260 int *num_copiesp, int *mirror_nump,
1261 struct btrfs_disk_key *disk_key,
1262 u64 parent_generation)
1263{
1264 struct btrfsic_block *next_block = NULL;
1265 int ret;
1266 struct btrfsic_block_link *l;
1267 int did_alloc_block_link;
1268 int block_was_created;
1269
1270 *next_blockp = NULL;
1271 if (0 == *num_copiesp) {
1272 *num_copiesp =
5d964051 1273 btrfs_num_copies(state->root->fs_info,
e06baab4 1274 next_bytenr, state->metablock_size);
5db02760
SB
1275 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1276 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
c1c9ff7c 1277 next_bytenr, *num_copiesp);
5db02760
SB
1278 *mirror_nump = 1;
1279 }
1280
1281 if (*mirror_nump > *num_copiesp)
1282 return 0;
1283
1284 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1285 printk(KERN_INFO
1286 "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1287 *mirror_nump);
1288 ret = btrfsic_map_block(state, next_bytenr,
e06baab4 1289 state->metablock_size,
5db02760
SB
1290 next_block_ctx, *mirror_nump);
1291 if (ret) {
1292 printk(KERN_INFO
1293 "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
c1c9ff7c 1294 next_bytenr, *mirror_nump);
5db02760
SB
1295 btrfsic_release_block_ctx(next_block_ctx);
1296 *next_blockp = NULL;
1297 return -1;
1298 }
1299
1300 next_block = btrfsic_block_lookup_or_add(state,
1301 next_block_ctx, "referenced ",
1302 1, force_iodone_flag,
1303 !force_iodone_flag,
1304 *mirror_nump,
1305 &block_was_created);
1306 if (NULL == next_block) {
1307 btrfsic_release_block_ctx(next_block_ctx);
1308 *next_blockp = NULL;
1309 return -1;
1310 }
1311 if (block_was_created) {
1312 l = NULL;
1313 next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
1314 } else {
cf90c59e
SB
1315 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) {
1316 if (next_block->logical_bytenr != next_bytenr &&
1317 !(!next_block->is_metadata &&
1318 0 == next_block->logical_bytenr))
1319 printk(KERN_INFO
1320 "Referenced block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n",
1321 next_bytenr, next_block_ctx->dev->name,
1322 next_block_ctx->dev_bytenr, *mirror_nump,
1323 btrfsic_get_block_type(state,
1324 next_block),
1325 next_block->logical_bytenr);
1326 else
1327 printk(KERN_INFO
1328 "Referenced block @%llu (%s/%llu/%d) found in hash table, %c.\n",
1329 next_bytenr, next_block_ctx->dev->name,
1330 next_block_ctx->dev_bytenr, *mirror_nump,
1331 btrfsic_get_block_type(state,
1332 next_block));
1333 }
5db02760
SB
1334 next_block->logical_bytenr = next_bytenr;
1335
1336 next_block->mirror_num = *mirror_nump;
1337 l = btrfsic_block_link_hashtable_lookup(
1338 next_block_ctx->dev->bdev,
1339 next_block_ctx->dev_bytenr,
1340 block_ctx->dev->bdev,
1341 block_ctx->dev_bytenr,
1342 &state->block_link_hashtable);
1343 }
1344
1345 next_block->disk_key = *disk_key;
1346 if (NULL == l) {
1347 l = btrfsic_block_link_alloc();
1348 if (NULL == l) {
1349 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
1350 btrfsic_release_block_ctx(next_block_ctx);
1351 *next_blockp = NULL;
1352 return -1;
1353 }
1354
1355 did_alloc_block_link = 1;
1356 l->block_ref_to = next_block;
1357 l->block_ref_from = block;
1358 l->ref_cnt = 1;
1359 l->parent_generation = parent_generation;
1360
1361 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1362 btrfsic_print_add_link(state, l);
1363
1364 list_add(&l->node_ref_to, &block->ref_to_list);
1365 list_add(&l->node_ref_from, &next_block->ref_from_list);
1366
1367 btrfsic_block_link_hashtable_add(l,
1368 &state->block_link_hashtable);
1369 } else {
1370 did_alloc_block_link = 0;
1371 if (0 == limit_nesting) {
1372 l->ref_cnt++;
1373 l->parent_generation = parent_generation;
1374 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1375 btrfsic_print_add_link(state, l);
1376 }
1377 }
1378
1379 if (limit_nesting > 0 && did_alloc_block_link) {
1380 ret = btrfsic_read_block(state, next_block_ctx);
e06baab4 1381 if (ret < (int)next_block_ctx->len) {
5db02760
SB
1382 printk(KERN_INFO
1383 "btrfsic: read block @logical %llu failed!\n",
c1c9ff7c 1384 next_bytenr);
5db02760
SB
1385 btrfsic_release_block_ctx(next_block_ctx);
1386 *next_blockp = NULL;
1387 return -1;
1388 }
1389
1390 *next_blockp = next_block;
1391 } else {
1392 *next_blockp = NULL;
1393 }
1394 (*mirror_nump)++;
1395
1396 return 0;
1397}
1398
1399static int btrfsic_handle_extent_data(
1400 struct btrfsic_state *state,
1401 struct btrfsic_block *block,
1402 struct btrfsic_block_data_ctx *block_ctx,
1403 u32 item_offset, int force_iodone_flag)
1404{
1405 int ret;
e06baab4
SB
1406 struct btrfs_file_extent_item file_extent_item;
1407 u64 file_extent_item_offset;
1408 u64 next_bytenr;
1409 u64 num_bytes;
1410 u64 generation;
5db02760
SB
1411 struct btrfsic_block_link *l;
1412
e06baab4
SB
1413 file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
1414 item_offset;
86ff7ffc
SB
1415 if (file_extent_item_offset +
1416 offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
1417 block_ctx->len) {
1418 printk(KERN_INFO
1419 "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1420 block_ctx->start, block_ctx->dev->name);
1421 return -1;
1422 }
1423
1424 btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1425 file_extent_item_offset,
1426 offsetof(struct btrfs_file_extent_item, disk_num_bytes));
1427 if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
3cae210f 1428 btrfs_stack_file_extent_disk_bytenr(&file_extent_item) == 0) {
86ff7ffc
SB
1429 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1430 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
1431 file_extent_item.type,
3cae210f
QW
1432 btrfs_stack_file_extent_disk_bytenr(
1433 &file_extent_item));
86ff7ffc
SB
1434 return 0;
1435 }
1436
e06baab4
SB
1437 if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
1438 block_ctx->len) {
1439 printk(KERN_INFO
1440 "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1441 block_ctx->start, block_ctx->dev->name);
1442 return -1;
1443 }
1444 btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1445 file_extent_item_offset,
1446 sizeof(struct btrfs_file_extent_item));
e20d6c5b
JB
1447 next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item);
1448 if (btrfs_stack_file_extent_compression(&file_extent_item) ==
1449 BTRFS_COMPRESS_NONE) {
1450 next_bytenr += btrfs_stack_file_extent_offset(&file_extent_item);
1451 num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
1452 } else {
1453 num_bytes = btrfs_stack_file_extent_disk_num_bytes(&file_extent_item);
1454 }
3cae210f 1455 generation = btrfs_stack_file_extent_generation(&file_extent_item);
e06baab4 1456
5db02760
SB
1457 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1458 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
1459 " offset = %llu, num_bytes = %llu\n",
e06baab4 1460 file_extent_item.type,
3cae210f 1461 btrfs_stack_file_extent_disk_bytenr(&file_extent_item),
3cae210f 1462 btrfs_stack_file_extent_offset(&file_extent_item),
c1c9ff7c 1463 num_bytes);
5db02760
SB
1464 while (num_bytes > 0) {
1465 u32 chunk_len;
1466 int num_copies;
1467 int mirror_num;
1468
e06baab4
SB
1469 if (num_bytes > state->datablock_size)
1470 chunk_len = state->datablock_size;
5db02760
SB
1471 else
1472 chunk_len = num_bytes;
1473
1474 num_copies =
5d964051 1475 btrfs_num_copies(state->root->fs_info,
e06baab4 1476 next_bytenr, state->datablock_size);
5db02760
SB
1477 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1478 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
c1c9ff7c 1479 next_bytenr, num_copies);
5db02760
SB
1480 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
1481 struct btrfsic_block_data_ctx next_block_ctx;
1482 struct btrfsic_block *next_block;
1483 int block_was_created;
1484
1485 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1486 printk(KERN_INFO "btrfsic_handle_extent_data("
1487 "mirror_num=%d)\n", mirror_num);
1488 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1489 printk(KERN_INFO
1490 "\tdisk_bytenr = %llu, num_bytes %u\n",
c1c9ff7c 1491 next_bytenr, chunk_len);
5db02760
SB
1492 ret = btrfsic_map_block(state, next_bytenr,
1493 chunk_len, &next_block_ctx,
1494 mirror_num);
1495 if (ret) {
1496 printk(KERN_INFO
1497 "btrfsic: btrfsic_map_block(@%llu,"
1498 " mirror=%d) failed!\n",
c1c9ff7c 1499 next_bytenr, mirror_num);
5db02760
SB
1500 return -1;
1501 }
1502
1503 next_block = btrfsic_block_lookup_or_add(
1504 state,
1505 &next_block_ctx,
1506 "referenced ",
1507 0,
1508 force_iodone_flag,
1509 !force_iodone_flag,
1510 mirror_num,
1511 &block_was_created);
1512 if (NULL == next_block) {
1513 printk(KERN_INFO
1514 "btrfsic: error, kmalloc failed!\n");
1515 btrfsic_release_block_ctx(&next_block_ctx);
1516 return -1;
1517 }
1518 if (!block_was_created) {
cf90c59e
SB
1519 if ((state->print_mask &
1520 BTRFSIC_PRINT_MASK_VERBOSE) &&
1521 next_block->logical_bytenr != next_bytenr &&
5db02760
SB
1522 !(!next_block->is_metadata &&
1523 0 == next_block->logical_bytenr)) {
1524 printk(KERN_INFO
1525 "Referenced block"
1526 " @%llu (%s/%llu/%d)"
1527 " found in hash table, D,"
1528 " bytenr mismatch"
1529 " (!= stored %llu).\n",
c1c9ff7c 1530 next_bytenr,
5db02760 1531 next_block_ctx.dev->name,
5db02760
SB
1532 next_block_ctx.dev_bytenr,
1533 mirror_num,
5db02760
SB
1534 next_block->logical_bytenr);
1535 }
1536 next_block->logical_bytenr = next_bytenr;
1537 next_block->mirror_num = mirror_num;
1538 }
1539
1540 l = btrfsic_block_link_lookup_or_add(state,
1541 &next_block_ctx,
1542 next_block, block,
1543 generation);
1544 btrfsic_release_block_ctx(&next_block_ctx);
1545 if (NULL == l)
1546 return -1;
1547 }
1548
1549 next_bytenr += chunk_len;
1550 num_bytes -= chunk_len;
1551 }
1552
1553 return 0;
1554}
1555
1556static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
1557 struct btrfsic_block_data_ctx *block_ctx_out,
1558 int mirror_num)
1559{
1560 int ret;
1561 u64 length;
1562 struct btrfs_bio *multi = NULL;
1563 struct btrfs_device *device;
1564
1565 length = len;
3ec706c8 1566 ret = btrfs_map_block(state->root->fs_info, READ,
5db02760
SB
1567 bytenr, &length, &multi, mirror_num);
1568
61891923
SB
1569 if (ret) {
1570 block_ctx_out->start = 0;
1571 block_ctx_out->dev_bytenr = 0;
1572 block_ctx_out->len = 0;
1573 block_ctx_out->dev = NULL;
1574 block_ctx_out->datav = NULL;
1575 block_ctx_out->pagev = NULL;
1576 block_ctx_out->mem_to_free = NULL;
1577
1578 return ret;
1579 }
1580
5db02760
SB
1581 device = multi->stripes[0].dev;
1582 block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
1583 block_ctx_out->dev_bytenr = multi->stripes[0].physical;
1584 block_ctx_out->start = bytenr;
1585 block_ctx_out->len = len;
e06baab4
SB
1586 block_ctx_out->datav = NULL;
1587 block_ctx_out->pagev = NULL;
1588 block_ctx_out->mem_to_free = NULL;
5db02760 1589
61891923 1590 kfree(multi);
5db02760
SB
1591 if (NULL == block_ctx_out->dev) {
1592 ret = -ENXIO;
1593 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
1594 }
1595
1596 return ret;
1597}
1598
5db02760
SB
1599static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
1600{
e06baab4
SB
1601 if (block_ctx->mem_to_free) {
1602 unsigned int num_pages;
1603
1604 BUG_ON(!block_ctx->datav);
1605 BUG_ON(!block_ctx->pagev);
1606 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1607 PAGE_CACHE_SHIFT;
1608 while (num_pages > 0) {
1609 num_pages--;
1610 if (block_ctx->datav[num_pages]) {
1611 kunmap(block_ctx->pagev[num_pages]);
1612 block_ctx->datav[num_pages] = NULL;
1613 }
1614 if (block_ctx->pagev[num_pages]) {
1615 __free_page(block_ctx->pagev[num_pages]);
1616 block_ctx->pagev[num_pages] = NULL;
1617 }
1618 }
1619
1620 kfree(block_ctx->mem_to_free);
1621 block_ctx->mem_to_free = NULL;
1622 block_ctx->pagev = NULL;
1623 block_ctx->datav = NULL;
5db02760
SB
1624 }
1625}
1626
1627static int btrfsic_read_block(struct btrfsic_state *state,
1628 struct btrfsic_block_data_ctx *block_ctx)
1629{
e06baab4
SB
1630 unsigned int num_pages;
1631 unsigned int i;
1632 u64 dev_bytenr;
1633 int ret;
1634
1635 BUG_ON(block_ctx->datav);
1636 BUG_ON(block_ctx->pagev);
1637 BUG_ON(block_ctx->mem_to_free);
1638 if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
5db02760
SB
1639 printk(KERN_INFO
1640 "btrfsic: read_block() with unaligned bytenr %llu\n",
c1c9ff7c 1641 block_ctx->dev_bytenr);
5db02760
SB
1642 return -1;
1643 }
e06baab4
SB
1644
1645 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1646 PAGE_CACHE_SHIFT;
1647 block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
1648 sizeof(*block_ctx->pagev)) *
1649 num_pages, GFP_NOFS);
1650 if (!block_ctx->mem_to_free)
0b8d8ce0 1651 return -ENOMEM;
e06baab4
SB
1652 block_ctx->datav = block_ctx->mem_to_free;
1653 block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
1654 for (i = 0; i < num_pages; i++) {
1655 block_ctx->pagev[i] = alloc_page(GFP_NOFS);
1656 if (!block_ctx->pagev[i])
1657 return -1;
5db02760
SB
1658 }
1659
e06baab4
SB
1660 dev_bytenr = block_ctx->dev_bytenr;
1661 for (i = 0; i < num_pages;) {
1662 struct bio *bio;
1663 unsigned int j;
e06baab4 1664
9be3395b 1665 bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
e06baab4
SB
1666 if (!bio) {
1667 printk(KERN_INFO
1668 "btrfsic: bio_alloc() for %u pages failed!\n",
1669 num_pages - i);
1670 return -1;
1671 }
1672 bio->bi_bdev = block_ctx->dev->bdev;
4f024f37 1673 bio->bi_iter.bi_sector = dev_bytenr >> 9;
e06baab4
SB
1674
1675 for (j = i; j < num_pages; j++) {
1676 ret = bio_add_page(bio, block_ctx->pagev[j],
1677 PAGE_CACHE_SIZE, 0);
1678 if (PAGE_CACHE_SIZE != ret)
1679 break;
1680 }
1681 if (j == i) {
1682 printk(KERN_INFO
1683 "btrfsic: error, failed to add a single page!\n");
1684 return -1;
1685 }
33879d45 1686 if (submit_bio_wait(READ, bio)) {
e06baab4
SB
1687 printk(KERN_INFO
1688 "btrfsic: read error at logical %llu dev %s!\n",
1689 block_ctx->start, block_ctx->dev->name);
1690 bio_put(bio);
1691 return -1;
1692 }
1693 bio_put(bio);
1694 dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
1695 i = j;
1696 }
1697 for (i = 0; i < num_pages; i++) {
1698 block_ctx->datav[i] = kmap(block_ctx->pagev[i]);
1699 if (!block_ctx->datav[i]) {
1700 printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n",
1701 block_ctx->dev->name);
1702 return -1;
1703 }
1704 }
5db02760
SB
1705
1706 return block_ctx->len;
1707}
1708
1709static void btrfsic_dump_database(struct btrfsic_state *state)
1710{
b69f2bef 1711 const struct btrfsic_block *b_all;
5db02760
SB
1712
1713 BUG_ON(NULL == state);
1714
1715 printk(KERN_INFO "all_blocks_list:\n");
b69f2bef
GT
1716 list_for_each_entry(b_all, &state->all_blocks_list, all_blocks_node) {
1717 const struct btrfsic_block_link *l;
5db02760
SB
1718
1719 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
1720 btrfsic_get_block_type(state, b_all),
c1c9ff7c
GU
1721 b_all->logical_bytenr, b_all->dev_state->name,
1722 b_all->dev_bytenr, b_all->mirror_num);
5db02760 1723
b69f2bef 1724 list_for_each_entry(l, &b_all->ref_to_list, node_ref_to) {
5db02760
SB
1725 printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1726 " refers %u* to"
1727 " %c @%llu (%s/%llu/%d)\n",
1728 btrfsic_get_block_type(state, b_all),
c1c9ff7c
GU
1729 b_all->logical_bytenr, b_all->dev_state->name,
1730 b_all->dev_bytenr, b_all->mirror_num,
5db02760
SB
1731 l->ref_cnt,
1732 btrfsic_get_block_type(state, l->block_ref_to),
5db02760
SB
1733 l->block_ref_to->logical_bytenr,
1734 l->block_ref_to->dev_state->name,
c1c9ff7c 1735 l->block_ref_to->dev_bytenr,
5db02760
SB
1736 l->block_ref_to->mirror_num);
1737 }
1738
b69f2bef 1739 list_for_each_entry(l, &b_all->ref_from_list, node_ref_from) {
5db02760
SB
1740 printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1741 " is ref %u* from"
1742 " %c @%llu (%s/%llu/%d)\n",
1743 btrfsic_get_block_type(state, b_all),
c1c9ff7c
GU
1744 b_all->logical_bytenr, b_all->dev_state->name,
1745 b_all->dev_bytenr, b_all->mirror_num,
5db02760
SB
1746 l->ref_cnt,
1747 btrfsic_get_block_type(state, l->block_ref_from),
5db02760
SB
1748 l->block_ref_from->logical_bytenr,
1749 l->block_ref_from->dev_state->name,
5db02760
SB
1750 l->block_ref_from->dev_bytenr,
1751 l->block_ref_from->mirror_num);
1752 }
1753
1754 printk(KERN_INFO "\n");
1755 }
1756}
1757
1758/*
1759 * Test whether the disk block contains a tree block (leaf or node)
1760 * (note that this test fails for the super block)
1761 */
1762static int btrfsic_test_for_metadata(struct btrfsic_state *state,
e06baab4 1763 char **datav, unsigned int num_pages)
5db02760
SB
1764{
1765 struct btrfs_header *h;
1766 u8 csum[BTRFS_CSUM_SIZE];
1767 u32 crc = ~(u32)0;
e06baab4 1768 unsigned int i;
5db02760 1769
e06baab4
SB
1770 if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
1771 return 1; /* not metadata */
1772 num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
1773 h = (struct btrfs_header *)datav[0];
5db02760
SB
1774
1775 if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
e06baab4 1776 return 1;
5db02760 1777
e06baab4
SB
1778 for (i = 0; i < num_pages; i++) {
1779 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
1780 size_t sublen = i ? PAGE_CACHE_SIZE :
1781 (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
1782
0b947aff 1783 crc = btrfs_crc32c(crc, data, sublen);
e06baab4 1784 }
5db02760
SB
1785 btrfs_csum_final(crc, csum);
1786 if (memcmp(csum, h->csum, state->csum_size))
e06baab4 1787 return 1;
5db02760 1788
e06baab4 1789 return 0; /* is metadata */
5db02760
SB
1790}
1791
1792static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
e06baab4
SB
1793 u64 dev_bytenr, char **mapped_datav,
1794 unsigned int num_pages,
1795 struct bio *bio, int *bio_is_patched,
5db02760
SB
1796 struct buffer_head *bh,
1797 int submit_bio_bh_rw)
1798{
1799 int is_metadata;
1800 struct btrfsic_block *block;
1801 struct btrfsic_block_data_ctx block_ctx;
1802 int ret;
1803 struct btrfsic_state *state = dev_state->state;
1804 struct block_device *bdev = dev_state->bdev;
e06baab4 1805 unsigned int processed_len;
5db02760 1806
5db02760
SB
1807 if (NULL != bio_is_patched)
1808 *bio_is_patched = 0;
1809
e06baab4
SB
1810again:
1811 if (num_pages == 0)
1812 return;
1813
1814 processed_len = 0;
1815 is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav,
1816 num_pages));
1817
5db02760
SB
1818 block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
1819 &state->block_hashtable);
1820 if (NULL != block) {
0b485143 1821 u64 bytenr = 0;
b69f2bef 1822 struct btrfsic_block_link *l, *tmp;
5db02760
SB
1823
1824 if (block->is_superblock) {
3cae210f
QW
1825 bytenr = btrfs_super_bytenr((struct btrfs_super_block *)
1826 mapped_datav[0]);
e06baab4
SB
1827 if (num_pages * PAGE_CACHE_SIZE <
1828 BTRFS_SUPER_INFO_SIZE) {
1829 printk(KERN_INFO
1830 "btrfsic: cannot work with too short bios!\n");
1831 return;
1832 }
5db02760 1833 is_metadata = 1;
e06baab4
SB
1834 BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1));
1835 processed_len = BTRFS_SUPER_INFO_SIZE;
5db02760
SB
1836 if (state->print_mask &
1837 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
1838 printk(KERN_INFO
1839 "[before new superblock is written]:\n");
1840 btrfsic_dump_tree_sub(state, block, 0);
1841 }
1842 }
1843 if (is_metadata) {
1844 if (!block->is_superblock) {
e06baab4
SB
1845 if (num_pages * PAGE_CACHE_SIZE <
1846 state->metablock_size) {
1847 printk(KERN_INFO
1848 "btrfsic: cannot work with too short bios!\n");
1849 return;
1850 }
1851 processed_len = state->metablock_size;
3cae210f
QW
1852 bytenr = btrfs_stack_header_bytenr(
1853 (struct btrfs_header *)
1854 mapped_datav[0]);
5db02760
SB
1855 btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
1856 dev_state,
e06baab4 1857 dev_bytenr);
5db02760 1858 }
cf90c59e
SB
1859 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) {
1860 if (block->logical_bytenr != bytenr &&
1861 !(!block->is_metadata &&
1862 block->logical_bytenr == 0))
1863 printk(KERN_INFO
1864 "Written block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n",
1865 bytenr, dev_state->name,
1866 dev_bytenr,
1867 block->mirror_num,
1868 btrfsic_get_block_type(state,
1869 block),
1870 block->logical_bytenr);
1871 else
1872 printk(KERN_INFO
1873 "Written block @%llu (%s/%llu/%d) found in hash table, %c.\n",
1874 bytenr, dev_state->name,
1875 dev_bytenr, block->mirror_num,
1876 btrfsic_get_block_type(state,
1877 block));
1878 }
301993a4 1879 block->logical_bytenr = bytenr;
5db02760 1880 } else {
e06baab4
SB
1881 if (num_pages * PAGE_CACHE_SIZE <
1882 state->datablock_size) {
1883 printk(KERN_INFO
1884 "btrfsic: cannot work with too short bios!\n");
1885 return;
1886 }
1887 processed_len = state->datablock_size;
5db02760
SB
1888 bytenr = block->logical_bytenr;
1889 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1890 printk(KERN_INFO
1891 "Written block @%llu (%s/%llu/%d)"
1892 " found in hash table, %c.\n",
c1c9ff7c 1893 bytenr, dev_state->name, dev_bytenr,
5db02760
SB
1894 block->mirror_num,
1895 btrfsic_get_block_type(state, block));
1896 }
1897
1898 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1899 printk(KERN_INFO
1900 "ref_to_list: %cE, ref_from_list: %cE\n",
1901 list_empty(&block->ref_to_list) ? ' ' : '!',
1902 list_empty(&block->ref_from_list) ? ' ' : '!');
1903 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
1904 printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1905 " @%llu (%s/%llu/%d), old(gen=%llu,"
1906 " objectid=%llu, type=%d, offset=%llu),"
1907 " new(gen=%llu),"
1908 " which is referenced by most recent superblock"
1909 " (superblockgen=%llu)!\n",
c1c9ff7c
GU
1910 btrfsic_get_block_type(state, block), bytenr,
1911 dev_state->name, dev_bytenr, block->mirror_num,
1912 block->generation,
3cae210f 1913 btrfs_disk_key_objectid(&block->disk_key),
5db02760 1914 block->disk_key.type,
3cae210f 1915 btrfs_disk_key_offset(&block->disk_key),
3cae210f
QW
1916 btrfs_stack_header_generation(
1917 (struct btrfs_header *) mapped_datav[0]),
5db02760
SB
1918 state->max_superblock_generation);
1919 btrfsic_dump_tree(state);
1920 }
1921
1922 if (!block->is_iodone && !block->never_written) {
1923 printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1924 " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
1925 " which is not yet iodone!\n",
c1c9ff7c
GU
1926 btrfsic_get_block_type(state, block), bytenr,
1927 dev_state->name, dev_bytenr, block->mirror_num,
1928 block->generation,
3cae210f
QW
1929 btrfs_stack_header_generation(
1930 (struct btrfs_header *)
1931 mapped_datav[0]));
5db02760
SB
1932 /* it would not be safe to go on */
1933 btrfsic_dump_tree(state);
e06baab4 1934 goto continue_loop;
5db02760
SB
1935 }
1936
1937 /*
1938 * Clear all references of this block. Do not free
1939 * the block itself even if is not referenced anymore
1940 * because it still carries valueable information
1941 * like whether it was ever written and IO completed.
1942 */
b69f2bef
GT
1943 list_for_each_entry_safe(l, tmp, &block->ref_to_list,
1944 node_ref_to) {
5db02760
SB
1945 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1946 btrfsic_print_rem_link(state, l);
1947 l->ref_cnt--;
1948 if (0 == l->ref_cnt) {
1949 list_del(&l->node_ref_to);
1950 list_del(&l->node_ref_from);
1951 btrfsic_block_link_hashtable_remove(l);
1952 btrfsic_block_link_free(l);
1953 }
1954 }
1955
5db02760
SB
1956 block_ctx.dev = dev_state;
1957 block_ctx.dev_bytenr = dev_bytenr;
f382e465
SB
1958 block_ctx.start = bytenr;
1959 block_ctx.len = processed_len;
1960 block_ctx.pagev = NULL;
1961 block_ctx.mem_to_free = NULL;
1962 block_ctx.datav = mapped_datav;
5db02760
SB
1963
1964 if (is_metadata || state->include_extent_data) {
1965 block->never_written = 0;
1966 block->iodone_w_error = 0;
1967 if (NULL != bio) {
1968 block->is_iodone = 0;
1969 BUG_ON(NULL == bio_is_patched);
1970 if (!*bio_is_patched) {
1971 block->orig_bio_bh_private =
1972 bio->bi_private;
1973 block->orig_bio_bh_end_io.bio =
1974 bio->bi_end_io;
1975 block->next_in_same_bio = NULL;
1976 bio->bi_private = block;
1977 bio->bi_end_io = btrfsic_bio_end_io;
1978 *bio_is_patched = 1;
1979 } else {
1980 struct btrfsic_block *chained_block =
1981 (struct btrfsic_block *)
1982 bio->bi_private;
1983
1984 BUG_ON(NULL == chained_block);
1985 block->orig_bio_bh_private =
1986 chained_block->orig_bio_bh_private;
1987 block->orig_bio_bh_end_io.bio =
1988 chained_block->orig_bio_bh_end_io.
1989 bio;
1990 block->next_in_same_bio = chained_block;
1991 bio->bi_private = block;
1992 }
1993 } else if (NULL != bh) {
1994 block->is_iodone = 0;
1995 block->orig_bio_bh_private = bh->b_private;
1996 block->orig_bio_bh_end_io.bh = bh->b_end_io;
1997 block->next_in_same_bio = NULL;
1998 bh->b_private = block;
1999 bh->b_end_io = btrfsic_bh_end_io;
2000 } else {
2001 block->is_iodone = 1;
2002 block->orig_bio_bh_private = NULL;
2003 block->orig_bio_bh_end_io.bio = NULL;
2004 block->next_in_same_bio = NULL;
2005 }
2006 }
2007
2008 block->flush_gen = dev_state->last_flush_gen + 1;
2009 block->submit_bio_bh_rw = submit_bio_bh_rw;
2010 if (is_metadata) {
2011 block->logical_bytenr = bytenr;
2012 block->is_metadata = 1;
2013 if (block->is_superblock) {
e06baab4
SB
2014 BUG_ON(PAGE_CACHE_SIZE !=
2015 BTRFS_SUPER_INFO_SIZE);
5db02760
SB
2016 ret = btrfsic_process_written_superblock(
2017 state,
2018 block,
2019 (struct btrfs_super_block *)
e06baab4 2020 mapped_datav[0]);
5db02760
SB
2021 if (state->print_mask &
2022 BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
2023 printk(KERN_INFO
2024 "[after new superblock is written]:\n");
2025 btrfsic_dump_tree_sub(state, block, 0);
2026 }
2027 } else {
2028 block->mirror_num = 0; /* unknown */
2029 ret = btrfsic_process_metablock(
2030 state,
2031 block,
2032 &block_ctx,
5db02760
SB
2033 0, 0);
2034 }
2035 if (ret)
2036 printk(KERN_INFO
2037 "btrfsic: btrfsic_process_metablock"
2038 "(root @%llu) failed!\n",
c1c9ff7c 2039 dev_bytenr);
5db02760
SB
2040 } else {
2041 block->is_metadata = 0;
2042 block->mirror_num = 0; /* unknown */
2043 block->generation = BTRFSIC_GENERATION_UNKNOWN;
2044 if (!state->include_extent_data
2045 && list_empty(&block->ref_from_list)) {
2046 /*
2047 * disk block is overwritten with extent
2048 * data (not meta data) and we are configured
2049 * to not include extent data: take the
2050 * chance and free the block's memory
2051 */
2052 btrfsic_block_hashtable_remove(block);
2053 list_del(&block->all_blocks_node);
2054 btrfsic_block_free(block);
2055 }
2056 }
2057 btrfsic_release_block_ctx(&block_ctx);
2058 } else {
2059 /* block has not been found in hash table */
2060 u64 bytenr;
2061
2062 if (!is_metadata) {
e06baab4 2063 processed_len = state->datablock_size;
5db02760
SB
2064 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2065 printk(KERN_INFO "Written block (%s/%llu/?)"
2066 " !found in hash table, D.\n",
c1c9ff7c 2067 dev_state->name, dev_bytenr);
e06baab4
SB
2068 if (!state->include_extent_data) {
2069 /* ignore that written D block */
2070 goto continue_loop;
2071 }
5db02760
SB
2072
2073 /* this is getting ugly for the
2074 * include_extent_data case... */
2075 bytenr = 0; /* unknown */
5db02760 2076 } else {
e06baab4 2077 processed_len = state->metablock_size;
3cae210f
QW
2078 bytenr = btrfs_stack_header_bytenr(
2079 (struct btrfs_header *)
2080 mapped_datav[0]);
5db02760 2081 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
e06baab4 2082 dev_bytenr);
5db02760
SB
2083 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2084 printk(KERN_INFO
2085 "Written block @%llu (%s/%llu/?)"
2086 " !found in hash table, M.\n",
c1c9ff7c 2087 bytenr, dev_state->name, dev_bytenr);
5db02760 2088 }
f382e465 2089
5db02760
SB
2090 block_ctx.dev = dev_state;
2091 block_ctx.dev_bytenr = dev_bytenr;
f382e465
SB
2092 block_ctx.start = bytenr;
2093 block_ctx.len = processed_len;
2094 block_ctx.pagev = NULL;
2095 block_ctx.mem_to_free = NULL;
2096 block_ctx.datav = mapped_datav;
5db02760
SB
2097
2098 block = btrfsic_block_alloc();
2099 if (NULL == block) {
2100 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2101 btrfsic_release_block_ctx(&block_ctx);
e06baab4 2102 goto continue_loop;
5db02760
SB
2103 }
2104 block->dev_state = dev_state;
2105 block->dev_bytenr = dev_bytenr;
2106 block->logical_bytenr = bytenr;
2107 block->is_metadata = is_metadata;
2108 block->never_written = 0;
2109 block->iodone_w_error = 0;
2110 block->mirror_num = 0; /* unknown */
2111 block->flush_gen = dev_state->last_flush_gen + 1;
2112 block->submit_bio_bh_rw = submit_bio_bh_rw;
2113 if (NULL != bio) {
2114 block->is_iodone = 0;
2115 BUG_ON(NULL == bio_is_patched);
2116 if (!*bio_is_patched) {
2117 block->orig_bio_bh_private = bio->bi_private;
2118 block->orig_bio_bh_end_io.bio = bio->bi_end_io;
2119 block->next_in_same_bio = NULL;
2120 bio->bi_private = block;
2121 bio->bi_end_io = btrfsic_bio_end_io;
2122 *bio_is_patched = 1;
2123 } else {
2124 struct btrfsic_block *chained_block =
2125 (struct btrfsic_block *)
2126 bio->bi_private;
2127
2128 BUG_ON(NULL == chained_block);
2129 block->orig_bio_bh_private =
2130 chained_block->orig_bio_bh_private;
2131 block->orig_bio_bh_end_io.bio =
2132 chained_block->orig_bio_bh_end_io.bio;
2133 block->next_in_same_bio = chained_block;
2134 bio->bi_private = block;
2135 }
2136 } else if (NULL != bh) {
2137 block->is_iodone = 0;
2138 block->orig_bio_bh_private = bh->b_private;
2139 block->orig_bio_bh_end_io.bh = bh->b_end_io;
2140 block->next_in_same_bio = NULL;
2141 bh->b_private = block;
2142 bh->b_end_io = btrfsic_bh_end_io;
2143 } else {
2144 block->is_iodone = 1;
2145 block->orig_bio_bh_private = NULL;
2146 block->orig_bio_bh_end_io.bio = NULL;
2147 block->next_in_same_bio = NULL;
2148 }
2149 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2150 printk(KERN_INFO
2151 "New written %c-block @%llu (%s/%llu/%d)\n",
2152 is_metadata ? 'M' : 'D',
c1c9ff7c
GU
2153 block->logical_bytenr, block->dev_state->name,
2154 block->dev_bytenr, block->mirror_num);
5db02760
SB
2155 list_add(&block->all_blocks_node, &state->all_blocks_list);
2156 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2157
2158 if (is_metadata) {
2159 ret = btrfsic_process_metablock(state, block,
e06baab4 2160 &block_ctx, 0, 0);
5db02760
SB
2161 if (ret)
2162 printk(KERN_INFO
2163 "btrfsic: process_metablock(root @%llu)"
2164 " failed!\n",
c1c9ff7c 2165 dev_bytenr);
5db02760
SB
2166 }
2167 btrfsic_release_block_ctx(&block_ctx);
2168 }
e06baab4
SB
2169
2170continue_loop:
2171 BUG_ON(!processed_len);
2172 dev_bytenr += processed_len;
2173 mapped_datav += processed_len >> PAGE_CACHE_SHIFT;
2174 num_pages -= processed_len >> PAGE_CACHE_SHIFT;
2175 goto again;
5db02760
SB
2176}
2177
4246a0b6 2178static void btrfsic_bio_end_io(struct bio *bp)
5db02760
SB
2179{
2180 struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private;
2181 int iodone_w_error;
2182
2183 /* mutex is not held! This is not save if IO is not yet completed
2184 * on umount */
2185 iodone_w_error = 0;
4246a0b6 2186 if (bp->bi_error)
5db02760
SB
2187 iodone_w_error = 1;
2188
2189 BUG_ON(NULL == block);
2190 bp->bi_private = block->orig_bio_bh_private;
2191 bp->bi_end_io = block->orig_bio_bh_end_io.bio;
2192
2193 do {
2194 struct btrfsic_block *next_block;
2195 struct btrfsic_dev_state *const dev_state = block->dev_state;
2196
2197 if ((dev_state->state->print_mask &
2198 BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2199 printk(KERN_INFO
2200 "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
4246a0b6 2201 bp->bi_error,
5db02760 2202 btrfsic_get_block_type(dev_state->state, block),
c1c9ff7c
GU
2203 block->logical_bytenr, dev_state->name,
2204 block->dev_bytenr, block->mirror_num);
5db02760
SB
2205 next_block = block->next_in_same_bio;
2206 block->iodone_w_error = iodone_w_error;
2207 if (block->submit_bio_bh_rw & REQ_FLUSH) {
2208 dev_state->last_flush_gen++;
2209 if ((dev_state->state->print_mask &
2210 BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2211 printk(KERN_INFO
2212 "bio_end_io() new %s flush_gen=%llu\n",
2213 dev_state->name,
5db02760
SB
2214 dev_state->last_flush_gen);
2215 }
2216 if (block->submit_bio_bh_rw & REQ_FUA)
2217 block->flush_gen = 0; /* FUA completed means block is
2218 * on disk */
2219 block->is_iodone = 1; /* for FLUSH, this releases the block */
2220 block = next_block;
2221 } while (NULL != block);
2222
4246a0b6 2223 bp->bi_end_io(bp);
5db02760
SB
2224}
2225
2226static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
2227{
2228 struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private;
2229 int iodone_w_error = !uptodate;
2230 struct btrfsic_dev_state *dev_state;
2231
2232 BUG_ON(NULL == block);
2233 dev_state = block->dev_state;
2234 if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2235 printk(KERN_INFO
2236 "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
2237 iodone_w_error,
2238 btrfsic_get_block_type(dev_state->state, block),
c1c9ff7c
GU
2239 block->logical_bytenr, block->dev_state->name,
2240 block->dev_bytenr, block->mirror_num);
5db02760
SB
2241
2242 block->iodone_w_error = iodone_w_error;
2243 if (block->submit_bio_bh_rw & REQ_FLUSH) {
2244 dev_state->last_flush_gen++;
2245 if ((dev_state->state->print_mask &
2246 BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2247 printk(KERN_INFO
2248 "bh_end_io() new %s flush_gen=%llu\n",
c1c9ff7c 2249 dev_state->name, dev_state->last_flush_gen);
5db02760
SB
2250 }
2251 if (block->submit_bio_bh_rw & REQ_FUA)
2252 block->flush_gen = 0; /* FUA completed means block is on disk */
2253
2254 bh->b_private = block->orig_bio_bh_private;
2255 bh->b_end_io = block->orig_bio_bh_end_io.bh;
2256 block->is_iodone = 1; /* for FLUSH, this releases the block */
2257 bh->b_end_io(bh, uptodate);
2258}
2259
2260static int btrfsic_process_written_superblock(
2261 struct btrfsic_state *state,
2262 struct btrfsic_block *const superblock,
2263 struct btrfs_super_block *const super_hdr)
2264{
2265 int pass;
2266
2267 superblock->generation = btrfs_super_generation(super_hdr);
2268 if (!(superblock->generation > state->max_superblock_generation ||
2269 0 == state->max_superblock_generation)) {
2270 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2271 printk(KERN_INFO
2272 "btrfsic: superblock @%llu (%s/%llu/%d)"
2273 " with old gen %llu <= %llu\n",
c1c9ff7c 2274 superblock->logical_bytenr,
5db02760 2275 superblock->dev_state->name,
c1c9ff7c 2276 superblock->dev_bytenr, superblock->mirror_num,
5db02760 2277 btrfs_super_generation(super_hdr),
5db02760
SB
2278 state->max_superblock_generation);
2279 } else {
2280 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2281 printk(KERN_INFO
2282 "btrfsic: got new superblock @%llu (%s/%llu/%d)"
2283 " with new gen %llu > %llu\n",
c1c9ff7c 2284 superblock->logical_bytenr,
5db02760 2285 superblock->dev_state->name,
c1c9ff7c 2286 superblock->dev_bytenr, superblock->mirror_num,
5db02760 2287 btrfs_super_generation(super_hdr),
5db02760
SB
2288 state->max_superblock_generation);
2289
2290 state->max_superblock_generation =
2291 btrfs_super_generation(super_hdr);
2292 state->latest_superblock = superblock;
2293 }
2294
2295 for (pass = 0; pass < 3; pass++) {
2296 int ret;
2297 u64 next_bytenr;
2298 struct btrfsic_block *next_block;
2299 struct btrfsic_block_data_ctx tmp_next_block_ctx;
2300 struct btrfsic_block_link *l;
2301 int num_copies;
2302 int mirror_num;
2303 const char *additional_string = NULL;
35a3621b 2304 struct btrfs_disk_key tmp_disk_key = {0};
5db02760 2305
3cae210f
QW
2306 btrfs_set_disk_key_objectid(&tmp_disk_key,
2307 BTRFS_ROOT_ITEM_KEY);
2308 btrfs_set_disk_key_objectid(&tmp_disk_key, 0);
5db02760
SB
2309
2310 switch (pass) {
2311 case 0:
3cae210f
QW
2312 btrfs_set_disk_key_objectid(&tmp_disk_key,
2313 BTRFS_ROOT_TREE_OBJECTID);
5db02760
SB
2314 additional_string = "root ";
2315 next_bytenr = btrfs_super_root(super_hdr);
2316 if (state->print_mask &
2317 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
c1c9ff7c 2318 printk(KERN_INFO "root@%llu\n", next_bytenr);
5db02760
SB
2319 break;
2320 case 1:
3cae210f
QW
2321 btrfs_set_disk_key_objectid(&tmp_disk_key,
2322 BTRFS_CHUNK_TREE_OBJECTID);
5db02760
SB
2323 additional_string = "chunk ";
2324 next_bytenr = btrfs_super_chunk_root(super_hdr);
2325 if (state->print_mask &
2326 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
c1c9ff7c 2327 printk(KERN_INFO "chunk@%llu\n", next_bytenr);
5db02760
SB
2328 break;
2329 case 2:
3cae210f
QW
2330 btrfs_set_disk_key_objectid(&tmp_disk_key,
2331 BTRFS_TREE_LOG_OBJECTID);
5db02760
SB
2332 additional_string = "log ";
2333 next_bytenr = btrfs_super_log_root(super_hdr);
2334 if (0 == next_bytenr)
2335 continue;
2336 if (state->print_mask &
2337 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
c1c9ff7c 2338 printk(KERN_INFO "log@%llu\n", next_bytenr);
5db02760
SB
2339 break;
2340 }
2341
2342 num_copies =
5d964051 2343 btrfs_num_copies(state->root->fs_info,
e06baab4 2344 next_bytenr, BTRFS_SUPER_INFO_SIZE);
5db02760
SB
2345 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
2346 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
c1c9ff7c 2347 next_bytenr, num_copies);
5db02760
SB
2348 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2349 int was_created;
2350
2351 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2352 printk(KERN_INFO
2353 "btrfsic_process_written_superblock("
2354 "mirror_num=%d)\n", mirror_num);
e06baab4
SB
2355 ret = btrfsic_map_block(state, next_bytenr,
2356 BTRFS_SUPER_INFO_SIZE,
5db02760
SB
2357 &tmp_next_block_ctx,
2358 mirror_num);
2359 if (ret) {
2360 printk(KERN_INFO
2361 "btrfsic: btrfsic_map_block(@%llu,"
2362 " mirror=%d) failed!\n",
c1c9ff7c 2363 next_bytenr, mirror_num);
5db02760
SB
2364 return -1;
2365 }
2366
2367 next_block = btrfsic_block_lookup_or_add(
2368 state,
2369 &tmp_next_block_ctx,
2370 additional_string,
2371 1, 0, 1,
2372 mirror_num,
2373 &was_created);
2374 if (NULL == next_block) {
2375 printk(KERN_INFO
2376 "btrfsic: error, kmalloc failed!\n");
2377 btrfsic_release_block_ctx(&tmp_next_block_ctx);
2378 return -1;
2379 }
2380
2381 next_block->disk_key = tmp_disk_key;
2382 if (was_created)
2383 next_block->generation =
2384 BTRFSIC_GENERATION_UNKNOWN;
2385 l = btrfsic_block_link_lookup_or_add(
2386 state,
2387 &tmp_next_block_ctx,
2388 next_block,
2389 superblock,
2390 BTRFSIC_GENERATION_UNKNOWN);
2391 btrfsic_release_block_ctx(&tmp_next_block_ctx);
2392 if (NULL == l)
2393 return -1;
2394 }
2395 }
2396
fae7f21c 2397 if (WARN_ON(-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)))
5db02760 2398 btrfsic_dump_tree(state);
5db02760
SB
2399
2400 return 0;
2401}
2402
2403static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2404 struct btrfsic_block *const block,
2405 int recursion_level)
2406{
b69f2bef 2407 const struct btrfsic_block_link *l;
5db02760
SB
2408 int ret = 0;
2409
2410 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2411 /*
2412 * Note that this situation can happen and does not
2413 * indicate an error in regular cases. It happens
2414 * when disk blocks are freed and later reused.
2415 * The check-integrity module is not aware of any
2416 * block free operations, it just recognizes block
2417 * write operations. Therefore it keeps the linkage
2418 * information for a block until a block is
2419 * rewritten. This can temporarily cause incorrect
2420 * and even circular linkage informations. This
2421 * causes no harm unless such blocks are referenced
2422 * by the most recent super block.
2423 */
2424 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2425 printk(KERN_INFO
2426 "btrfsic: abort cyclic linkage (case 1).\n");
2427
2428 return ret;
2429 }
2430
2431 /*
2432 * This algorithm is recursive because the amount of used stack
2433 * space is very small and the max recursion depth is limited.
2434 */
b69f2bef 2435 list_for_each_entry(l, &block->ref_to_list, node_ref_to) {
5db02760
SB
2436 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2437 printk(KERN_INFO
2438 "rl=%d, %c @%llu (%s/%llu/%d)"
2439 " %u* refers to %c @%llu (%s/%llu/%d)\n",
2440 recursion_level,
2441 btrfsic_get_block_type(state, block),
c1c9ff7c
GU
2442 block->logical_bytenr, block->dev_state->name,
2443 block->dev_bytenr, block->mirror_num,
5db02760
SB
2444 l->ref_cnt,
2445 btrfsic_get_block_type(state, l->block_ref_to),
5db02760
SB
2446 l->block_ref_to->logical_bytenr,
2447 l->block_ref_to->dev_state->name,
c1c9ff7c 2448 l->block_ref_to->dev_bytenr,
5db02760
SB
2449 l->block_ref_to->mirror_num);
2450 if (l->block_ref_to->never_written) {
2451 printk(KERN_INFO "btrfs: attempt to write superblock"
2452 " which references block %c @%llu (%s/%llu/%d)"
2453 " which is never written!\n",
2454 btrfsic_get_block_type(state, l->block_ref_to),
5db02760
SB
2455 l->block_ref_to->logical_bytenr,
2456 l->block_ref_to->dev_state->name,
c1c9ff7c 2457 l->block_ref_to->dev_bytenr,
5db02760
SB
2458 l->block_ref_to->mirror_num);
2459 ret = -1;
2460 } else if (!l->block_ref_to->is_iodone) {
2461 printk(KERN_INFO "btrfs: attempt to write superblock"
2462 " which references block %c @%llu (%s/%llu/%d)"
2463 " which is not yet iodone!\n",
2464 btrfsic_get_block_type(state, l->block_ref_to),
5db02760
SB
2465 l->block_ref_to->logical_bytenr,
2466 l->block_ref_to->dev_state->name,
c1c9ff7c 2467 l->block_ref_to->dev_bytenr,
5db02760
SB
2468 l->block_ref_to->mirror_num);
2469 ret = -1;
62856a9b
SB
2470 } else if (l->block_ref_to->iodone_w_error) {
2471 printk(KERN_INFO "btrfs: attempt to write superblock"
2472 " which references block %c @%llu (%s/%llu/%d)"
2473 " which has write error!\n",
2474 btrfsic_get_block_type(state, l->block_ref_to),
62856a9b
SB
2475 l->block_ref_to->logical_bytenr,
2476 l->block_ref_to->dev_state->name,
c1c9ff7c 2477 l->block_ref_to->dev_bytenr,
62856a9b
SB
2478 l->block_ref_to->mirror_num);
2479 ret = -1;
5db02760
SB
2480 } else if (l->parent_generation !=
2481 l->block_ref_to->generation &&
2482 BTRFSIC_GENERATION_UNKNOWN !=
2483 l->parent_generation &&
2484 BTRFSIC_GENERATION_UNKNOWN !=
2485 l->block_ref_to->generation) {
2486 printk(KERN_INFO "btrfs: attempt to write superblock"
2487 " which references block %c @%llu (%s/%llu/%d)"
2488 " with generation %llu !="
2489 " parent generation %llu!\n",
2490 btrfsic_get_block_type(state, l->block_ref_to),
5db02760
SB
2491 l->block_ref_to->logical_bytenr,
2492 l->block_ref_to->dev_state->name,
c1c9ff7c 2493 l->block_ref_to->dev_bytenr,
5db02760 2494 l->block_ref_to->mirror_num,
c1c9ff7c
GU
2495 l->block_ref_to->generation,
2496 l->parent_generation);
5db02760
SB
2497 ret = -1;
2498 } else if (l->block_ref_to->flush_gen >
2499 l->block_ref_to->dev_state->last_flush_gen) {
2500 printk(KERN_INFO "btrfs: attempt to write superblock"
2501 " which references block %c @%llu (%s/%llu/%d)"
2502 " which is not flushed out of disk's write cache"
2503 " (block flush_gen=%llu,"
2504 " dev->flush_gen=%llu)!\n",
2505 btrfsic_get_block_type(state, l->block_ref_to),
5db02760
SB
2506 l->block_ref_to->logical_bytenr,
2507 l->block_ref_to->dev_state->name,
c1c9ff7c
GU
2508 l->block_ref_to->dev_bytenr,
2509 l->block_ref_to->mirror_num, block->flush_gen,
5db02760
SB
2510 l->block_ref_to->dev_state->last_flush_gen);
2511 ret = -1;
2512 } else if (-1 == btrfsic_check_all_ref_blocks(state,
2513 l->block_ref_to,
2514 recursion_level +
2515 1)) {
2516 ret = -1;
2517 }
2518 }
2519
2520 return ret;
2521}
2522
2523static int btrfsic_is_block_ref_by_superblock(
2524 const struct btrfsic_state *state,
2525 const struct btrfsic_block *block,
2526 int recursion_level)
2527{
b69f2bef 2528 const struct btrfsic_block_link *l;
5db02760
SB
2529
2530 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2531 /* refer to comment at "abort cyclic linkage (case 1)" */
2532 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2533 printk(KERN_INFO
2534 "btrfsic: abort cyclic linkage (case 2).\n");
2535
2536 return 0;
2537 }
2538
2539 /*
2540 * This algorithm is recursive because the amount of used stack space
2541 * is very small and the max recursion depth is limited.
2542 */
b69f2bef 2543 list_for_each_entry(l, &block->ref_from_list, node_ref_from) {
5db02760
SB
2544 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2545 printk(KERN_INFO
2546 "rl=%d, %c @%llu (%s/%llu/%d)"
2547 " is ref %u* from %c @%llu (%s/%llu/%d)\n",
2548 recursion_level,
2549 btrfsic_get_block_type(state, block),
c1c9ff7c
GU
2550 block->logical_bytenr, block->dev_state->name,
2551 block->dev_bytenr, block->mirror_num,
5db02760
SB
2552 l->ref_cnt,
2553 btrfsic_get_block_type(state, l->block_ref_from),
5db02760
SB
2554 l->block_ref_from->logical_bytenr,
2555 l->block_ref_from->dev_state->name,
5db02760
SB
2556 l->block_ref_from->dev_bytenr,
2557 l->block_ref_from->mirror_num);
2558 if (l->block_ref_from->is_superblock &&
2559 state->latest_superblock->dev_bytenr ==
2560 l->block_ref_from->dev_bytenr &&
2561 state->latest_superblock->dev_state->bdev ==
2562 l->block_ref_from->dev_state->bdev)
2563 return 1;
2564 else if (btrfsic_is_block_ref_by_superblock(state,
2565 l->block_ref_from,
2566 recursion_level +
2567 1))
2568 return 1;
2569 }
2570
2571 return 0;
2572}
2573
2574static void btrfsic_print_add_link(const struct btrfsic_state *state,
2575 const struct btrfsic_block_link *l)
2576{
2577 printk(KERN_INFO
2578 "Add %u* link from %c @%llu (%s/%llu/%d)"
2579 " to %c @%llu (%s/%llu/%d).\n",
2580 l->ref_cnt,
2581 btrfsic_get_block_type(state, l->block_ref_from),
c1c9ff7c 2582 l->block_ref_from->logical_bytenr,
5db02760 2583 l->block_ref_from->dev_state->name,
c1c9ff7c 2584 l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
5db02760 2585 btrfsic_get_block_type(state, l->block_ref_to),
c1c9ff7c
GU
2586 l->block_ref_to->logical_bytenr,
2587 l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
5db02760
SB
2588 l->block_ref_to->mirror_num);
2589}
2590
2591static void btrfsic_print_rem_link(const struct btrfsic_state *state,
2592 const struct btrfsic_block_link *l)
2593{
2594 printk(KERN_INFO
2595 "Rem %u* link from %c @%llu (%s/%llu/%d)"
2596 " to %c @%llu (%s/%llu/%d).\n",
2597 l->ref_cnt,
2598 btrfsic_get_block_type(state, l->block_ref_from),
c1c9ff7c 2599 l->block_ref_from->logical_bytenr,
5db02760 2600 l->block_ref_from->dev_state->name,
c1c9ff7c 2601 l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
5db02760 2602 btrfsic_get_block_type(state, l->block_ref_to),
c1c9ff7c
GU
2603 l->block_ref_to->logical_bytenr,
2604 l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
5db02760
SB
2605 l->block_ref_to->mirror_num);
2606}
2607
2608static char btrfsic_get_block_type(const struct btrfsic_state *state,
2609 const struct btrfsic_block *block)
2610{
2611 if (block->is_superblock &&
2612 state->latest_superblock->dev_bytenr == block->dev_bytenr &&
2613 state->latest_superblock->dev_state->bdev == block->dev_state->bdev)
2614 return 'S';
2615 else if (block->is_superblock)
2616 return 's';
2617 else if (block->is_metadata)
2618 return 'M';
2619 else
2620 return 'D';
2621}
2622
2623static void btrfsic_dump_tree(const struct btrfsic_state *state)
2624{
2625 btrfsic_dump_tree_sub(state, state->latest_superblock, 0);
2626}
2627
2628static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
2629 const struct btrfsic_block *block,
2630 int indent_level)
2631{
b69f2bef 2632 const struct btrfsic_block_link *l;
5db02760
SB
2633 int indent_add;
2634 static char buf[80];
2635 int cursor_position;
2636
2637 /*
2638 * Should better fill an on-stack buffer with a complete line and
2639 * dump it at once when it is time to print a newline character.
2640 */
2641
2642 /*
2643 * This algorithm is recursive because the amount of used stack space
2644 * is very small and the max recursion depth is limited.
2645 */
2646 indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
2647 btrfsic_get_block_type(state, block),
c1c9ff7c
GU
2648 block->logical_bytenr, block->dev_state->name,
2649 block->dev_bytenr, block->mirror_num);
5db02760
SB
2650 if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2651 printk("[...]\n");
2652 return;
2653 }
2654 printk(buf);
2655 indent_level += indent_add;
2656 if (list_empty(&block->ref_to_list)) {
2657 printk("\n");
2658 return;
2659 }
2660 if (block->mirror_num > 1 &&
2661 !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) {
2662 printk(" [...]\n");
2663 return;
2664 }
2665
2666 cursor_position = indent_level;
b69f2bef 2667 list_for_each_entry(l, &block->ref_to_list, node_ref_to) {
5db02760
SB
2668 while (cursor_position < indent_level) {
2669 printk(" ");
2670 cursor_position++;
2671 }
2672 if (l->ref_cnt > 1)
2673 indent_add = sprintf(buf, " %d*--> ", l->ref_cnt);
2674 else
2675 indent_add = sprintf(buf, " --> ");
2676 if (indent_level + indent_add >
2677 BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2678 printk("[...]\n");
2679 cursor_position = 0;
2680 continue;
2681 }
2682
2683 printk(buf);
2684
2685 btrfsic_dump_tree_sub(state, l->block_ref_to,
2686 indent_level + indent_add);
2687 cursor_position = 0;
2688 }
2689}
2690
2691static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
2692 struct btrfsic_state *state,
2693 struct btrfsic_block_data_ctx *next_block_ctx,
2694 struct btrfsic_block *next_block,
2695 struct btrfsic_block *from_block,
2696 u64 parent_generation)
2697{
2698 struct btrfsic_block_link *l;
2699
2700 l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev,
2701 next_block_ctx->dev_bytenr,
2702 from_block->dev_state->bdev,
2703 from_block->dev_bytenr,
2704 &state->block_link_hashtable);
2705 if (NULL == l) {
2706 l = btrfsic_block_link_alloc();
2707 if (NULL == l) {
2708 printk(KERN_INFO
2709 "btrfsic: error, kmalloc" " failed!\n");
2710 return NULL;
2711 }
2712
2713 l->block_ref_to = next_block;
2714 l->block_ref_from = from_block;
2715 l->ref_cnt = 1;
2716 l->parent_generation = parent_generation;
2717
2718 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2719 btrfsic_print_add_link(state, l);
2720
2721 list_add(&l->node_ref_to, &from_block->ref_to_list);
2722 list_add(&l->node_ref_from, &next_block->ref_from_list);
2723
2724 btrfsic_block_link_hashtable_add(l,
2725 &state->block_link_hashtable);
2726 } else {
2727 l->ref_cnt++;
2728 l->parent_generation = parent_generation;
2729 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2730 btrfsic_print_add_link(state, l);
2731 }
2732
2733 return l;
2734}
2735
2736static struct btrfsic_block *btrfsic_block_lookup_or_add(
2737 struct btrfsic_state *state,
2738 struct btrfsic_block_data_ctx *block_ctx,
2739 const char *additional_string,
2740 int is_metadata,
2741 int is_iodone,
2742 int never_written,
2743 int mirror_num,
2744 int *was_created)
2745{
2746 struct btrfsic_block *block;
2747
2748 block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev,
2749 block_ctx->dev_bytenr,
2750 &state->block_hashtable);
2751 if (NULL == block) {
2752 struct btrfsic_dev_state *dev_state;
2753
2754 block = btrfsic_block_alloc();
2755 if (NULL == block) {
2756 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2757 return NULL;
2758 }
2759 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
2760 if (NULL == dev_state) {
2761 printk(KERN_INFO
2762 "btrfsic: error, lookup dev_state failed!\n");
2763 btrfsic_block_free(block);
2764 return NULL;
2765 }
2766 block->dev_state = dev_state;
2767 block->dev_bytenr = block_ctx->dev_bytenr;
2768 block->logical_bytenr = block_ctx->start;
2769 block->is_metadata = is_metadata;
2770 block->is_iodone = is_iodone;
2771 block->never_written = never_written;
2772 block->mirror_num = mirror_num;
2773 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2774 printk(KERN_INFO
2775 "New %s%c-block @%llu (%s/%llu/%d)\n",
2776 additional_string,
2777 btrfsic_get_block_type(state, block),
c1c9ff7c
GU
2778 block->logical_bytenr, dev_state->name,
2779 block->dev_bytenr, mirror_num);
5db02760
SB
2780 list_add(&block->all_blocks_node, &state->all_blocks_list);
2781 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2782 if (NULL != was_created)
2783 *was_created = 1;
2784 } else {
2785 if (NULL != was_created)
2786 *was_created = 0;
2787 }
2788
2789 return block;
2790}
2791
2792static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2793 u64 bytenr,
2794 struct btrfsic_dev_state *dev_state,
e06baab4 2795 u64 dev_bytenr)
5db02760
SB
2796{
2797 int num_copies;
2798 int mirror_num;
2799 int ret;
2800 struct btrfsic_block_data_ctx block_ctx;
2801 int match = 0;
2802
5d964051 2803 num_copies = btrfs_num_copies(state->root->fs_info,
e06baab4 2804 bytenr, state->metablock_size);
5db02760
SB
2805
2806 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
e06baab4 2807 ret = btrfsic_map_block(state, bytenr, state->metablock_size,
5db02760
SB
2808 &block_ctx, mirror_num);
2809 if (ret) {
2810 printk(KERN_INFO "btrfsic:"
2811 " btrfsic_map_block(logical @%llu,"
2812 " mirror %d) failed!\n",
c1c9ff7c 2813 bytenr, mirror_num);
5db02760
SB
2814 continue;
2815 }
2816
2817 if (dev_state->bdev == block_ctx.dev->bdev &&
2818 dev_bytenr == block_ctx.dev_bytenr) {
2819 match++;
2820 btrfsic_release_block_ctx(&block_ctx);
2821 break;
2822 }
2823 btrfsic_release_block_ctx(&block_ctx);
2824 }
2825
fae7f21c 2826 if (WARN_ON(!match)) {
5db02760
SB
2827 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
2828 " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
2829 " phys_bytenr=%llu)!\n",
c1c9ff7c 2830 bytenr, dev_state->name, dev_bytenr);
5db02760 2831 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
e06baab4
SB
2832 ret = btrfsic_map_block(state, bytenr,
2833 state->metablock_size,
5db02760
SB
2834 &block_ctx, mirror_num);
2835 if (ret)
2836 continue;
2837
2838 printk(KERN_INFO "Read logical bytenr @%llu maps to"
2839 " (%s/%llu/%d)\n",
c1c9ff7c
GU
2840 bytenr, block_ctx.dev->name,
2841 block_ctx.dev_bytenr, mirror_num);
5db02760 2842 }
5db02760
SB
2843 }
2844}
2845
2846static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
2847 struct block_device *bdev)
2848{
2849 struct btrfsic_dev_state *ds;
2850
2851 ds = btrfsic_dev_state_hashtable_lookup(bdev,
2852 &btrfsic_dev_state_hashtable);
2853 return ds;
2854}
2855
2856int btrfsic_submit_bh(int rw, struct buffer_head *bh)
2857{
2858 struct btrfsic_dev_state *dev_state;
2859
2860 if (!btrfsic_is_initialized)
2861 return submit_bh(rw, bh);
2862
2863 mutex_lock(&btrfsic_mutex);
2864 /* since btrfsic_submit_bh() might also be called before
2865 * btrfsic_mount(), this might return NULL */
2866 dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
2867
2868 /* Only called to write the superblock (incl. FLUSH/FUA) */
2869 if (NULL != dev_state &&
2870 (rw & WRITE) && bh->b_size > 0) {
2871 u64 dev_bytenr;
2872
2873 dev_bytenr = 4096 * bh->b_blocknr;
2874 if (dev_state->state->print_mask &
2875 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2876 printk(KERN_INFO
fce29364 2877 "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu),"
8d78eb16 2878 " size=%zu, data=%p, bdev=%p)\n",
fce29364 2879 rw, (unsigned long long)bh->b_blocknr,
8d78eb16 2880 dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev);
5db02760 2881 btrfsic_process_written_block(dev_state, dev_bytenr,
e06baab4 2882 &bh->b_data, 1, NULL,
5db02760
SB
2883 NULL, bh, rw);
2884 } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
2885 if (dev_state->state->print_mask &
2886 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2887 printk(KERN_INFO
e06baab4 2888 "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
5db02760
SB
2889 rw, bh->b_bdev);
2890 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
2891 if ((dev_state->state->print_mask &
2892 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
2893 BTRFSIC_PRINT_MASK_VERBOSE)))
2894 printk(KERN_INFO
2895 "btrfsic_submit_bh(%s) with FLUSH"
2896 " but dummy block already in use"
2897 " (ignored)!\n",
2898 dev_state->name);
2899 } else {
2900 struct btrfsic_block *const block =
2901 &dev_state->dummy_block_for_bio_bh_flush;
2902
2903 block->is_iodone = 0;
2904 block->never_written = 0;
2905 block->iodone_w_error = 0;
2906 block->flush_gen = dev_state->last_flush_gen + 1;
2907 block->submit_bio_bh_rw = rw;
2908 block->orig_bio_bh_private = bh->b_private;
2909 block->orig_bio_bh_end_io.bh = bh->b_end_io;
2910 block->next_in_same_bio = NULL;
2911 bh->b_private = block;
2912 bh->b_end_io = btrfsic_bh_end_io;
2913 }
2914 }
2915 mutex_unlock(&btrfsic_mutex);
2916 return submit_bh(rw, bh);
2917}
2918
33879d45 2919static void __btrfsic_submit_bio(int rw, struct bio *bio)
5db02760
SB
2920{
2921 struct btrfsic_dev_state *dev_state;
2922
33879d45 2923 if (!btrfsic_is_initialized)
5db02760 2924 return;
5db02760
SB
2925
2926 mutex_lock(&btrfsic_mutex);
2927 /* since btrfsic_submit_bio() is also called before
2928 * btrfsic_mount(), this might return NULL */
2929 dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
2930 if (NULL != dev_state &&
2931 (rw & WRITE) && NULL != bio->bi_io_vec) {
2932 unsigned int i;
2933 u64 dev_bytenr;
56d140f5 2934 u64 cur_bytenr;
5db02760 2935 int bio_is_patched;
e06baab4 2936 char **mapped_datav;
5db02760 2937
4f024f37 2938 dev_bytenr = 512 * bio->bi_iter.bi_sector;
5db02760
SB
2939 bio_is_patched = 0;
2940 if (dev_state->state->print_mask &
2941 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2942 printk(KERN_INFO
2943 "submit_bio(rw=0x%x, bi_vcnt=%u,"
fce29364
GU
2944 " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
2945 rw, bio->bi_vcnt,
4f024f37
KO
2946 (unsigned long long)bio->bi_iter.bi_sector,
2947 dev_bytenr, bio->bi_bdev);
5db02760 2948
31e818fe
DS
2949 mapped_datav = kmalloc_array(bio->bi_vcnt,
2950 sizeof(*mapped_datav), GFP_NOFS);
e06baab4
SB
2951 if (!mapped_datav)
2952 goto leave;
56d140f5 2953 cur_bytenr = dev_bytenr;
5db02760 2954 for (i = 0; i < bio->bi_vcnt; i++) {
e06baab4
SB
2955 BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
2956 mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
2957 if (!mapped_datav[i]) {
2958 while (i > 0) {
2959 i--;
2960 kunmap(bio->bi_io_vec[i].bv_page);
2961 }
2962 kfree(mapped_datav);
2963 goto leave;
2964 }
56d140f5
SB
2965 if (dev_state->state->print_mask &
2966 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE)
5db02760 2967 printk(KERN_INFO
56d140f5
SB
2968 "#%u: bytenr=%llu, len=%u, offset=%u\n",
2969 i, cur_bytenr, bio->bi_io_vec[i].bv_len,
5db02760 2970 bio->bi_io_vec[i].bv_offset);
56d140f5 2971 cur_bytenr += bio->bi_io_vec[i].bv_len;
e06baab4
SB
2972 }
2973 btrfsic_process_written_block(dev_state, dev_bytenr,
2974 mapped_datav, bio->bi_vcnt,
2975 bio, &bio_is_patched,
2976 NULL, rw);
2977 while (i > 0) {
2978 i--;
5db02760 2979 kunmap(bio->bi_io_vec[i].bv_page);
5db02760 2980 }
e06baab4 2981 kfree(mapped_datav);
5db02760
SB
2982 } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
2983 if (dev_state->state->print_mask &
2984 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2985 printk(KERN_INFO
e06baab4 2986 "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
5db02760
SB
2987 rw, bio->bi_bdev);
2988 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
2989 if ((dev_state->state->print_mask &
2990 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
2991 BTRFSIC_PRINT_MASK_VERBOSE)))
2992 printk(KERN_INFO
2993 "btrfsic_submit_bio(%s) with FLUSH"
2994 " but dummy block already in use"
2995 " (ignored)!\n",
2996 dev_state->name);
2997 } else {
2998 struct btrfsic_block *const block =
2999 &dev_state->dummy_block_for_bio_bh_flush;
3000
3001 block->is_iodone = 0;
3002 block->never_written = 0;
3003 block->iodone_w_error = 0;
3004 block->flush_gen = dev_state->last_flush_gen + 1;
3005 block->submit_bio_bh_rw = rw;
3006 block->orig_bio_bh_private = bio->bi_private;
3007 block->orig_bio_bh_end_io.bio = bio->bi_end_io;
3008 block->next_in_same_bio = NULL;
3009 bio->bi_private = block;
3010 bio->bi_end_io = btrfsic_bio_end_io;
3011 }
3012 }
e06baab4 3013leave:
5db02760 3014 mutex_unlock(&btrfsic_mutex);
33879d45 3015}
5db02760 3016
33879d45
KO
3017void btrfsic_submit_bio(int rw, struct bio *bio)
3018{
3019 __btrfsic_submit_bio(rw, bio);
5db02760
SB
3020 submit_bio(rw, bio);
3021}
3022
33879d45
KO
3023int btrfsic_submit_bio_wait(int rw, struct bio *bio)
3024{
3025 __btrfsic_submit_bio(rw, bio);
3026 return submit_bio_wait(rw, bio);
3027}
3028
5db02760
SB
3029int btrfsic_mount(struct btrfs_root *root,
3030 struct btrfs_fs_devices *fs_devices,
3031 int including_extent_data, u32 print_mask)
3032{
3033 int ret;
3034 struct btrfsic_state *state;
3035 struct list_head *dev_head = &fs_devices->devices;
3036 struct btrfs_device *device;
3037
e06baab4
SB
3038 if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
3039 printk(KERN_INFO
3040 "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
778746b5 3041 root->nodesize, PAGE_CACHE_SIZE);
e06baab4
SB
3042 return -1;
3043 }
e06baab4
SB
3044 if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3045 printk(KERN_INFO
3046 "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
778746b5 3047 root->sectorsize, PAGE_CACHE_SIZE);
e06baab4
SB
3048 return -1;
3049 }
6b3a4d60
SW
3050 state = kzalloc(sizeof(*state), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
3051 if (!state) {
3052 state = vzalloc(sizeof(*state));
3053 if (!state) {
3054 printk(KERN_INFO "btrfs check-integrity: vzalloc() failed!\n");
3055 return -1;
3056 }
5db02760
SB
3057 }
3058
3059 if (!btrfsic_is_initialized) {
3060 mutex_init(&btrfsic_mutex);
3061 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable);
3062 btrfsic_is_initialized = 1;
3063 }
3064 mutex_lock(&btrfsic_mutex);
3065 state->root = root;
3066 state->print_mask = print_mask;
3067 state->include_extent_data = including_extent_data;
3068 state->csum_size = 0;
e06baab4
SB
3069 state->metablock_size = root->nodesize;
3070 state->datablock_size = root->sectorsize;
5db02760
SB
3071 INIT_LIST_HEAD(&state->all_blocks_list);
3072 btrfsic_block_hashtable_init(&state->block_hashtable);
3073 btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
3074 state->max_superblock_generation = 0;
3075 state->latest_superblock = NULL;
3076
3077 list_for_each_entry(device, dev_head, dev_list) {
3078 struct btrfsic_dev_state *ds;
3079 char *p;
3080
3081 if (!device->bdev || !device->name)
3082 continue;
3083
3084 ds = btrfsic_dev_state_alloc();
3085 if (NULL == ds) {
3086 printk(KERN_INFO
3087 "btrfs check-integrity: kmalloc() failed!\n");
3088 mutex_unlock(&btrfsic_mutex);
3089 return -1;
3090 }
3091 ds->bdev = device->bdev;
3092 ds->state = state;
3093 bdevname(ds->bdev, ds->name);
3094 ds->name[BDEVNAME_SIZE - 1] = '\0';
3095 for (p = ds->name; *p != '\0'; p++);
3096 while (p > ds->name && *p != '/')
3097 p--;
3098 if (*p == '/')
3099 p++;
3100 strlcpy(ds->name, p, sizeof(ds->name));
3101 btrfsic_dev_state_hashtable_add(ds,
3102 &btrfsic_dev_state_hashtable);
3103 }
3104
3105 ret = btrfsic_process_superblock(state, fs_devices);
3106 if (0 != ret) {
3107 mutex_unlock(&btrfsic_mutex);
3108 btrfsic_unmount(root, fs_devices);
3109 return ret;
3110 }
3111
3112 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE)
3113 btrfsic_dump_database(state);
3114 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE)
3115 btrfsic_dump_tree(state);
3116
3117 mutex_unlock(&btrfsic_mutex);
3118 return 0;
3119}
3120
3121void btrfsic_unmount(struct btrfs_root *root,
3122 struct btrfs_fs_devices *fs_devices)
3123{
b69f2bef 3124 struct btrfsic_block *b_all, *tmp_all;
5db02760
SB
3125 struct btrfsic_state *state;
3126 struct list_head *dev_head = &fs_devices->devices;
3127 struct btrfs_device *device;
3128
3129 if (!btrfsic_is_initialized)
3130 return;
3131
3132 mutex_lock(&btrfsic_mutex);
3133
3134 state = NULL;
3135 list_for_each_entry(device, dev_head, dev_list) {
3136 struct btrfsic_dev_state *ds;
3137
3138 if (!device->bdev || !device->name)
3139 continue;
3140
3141 ds = btrfsic_dev_state_hashtable_lookup(
3142 device->bdev,
3143 &btrfsic_dev_state_hashtable);
3144 if (NULL != ds) {
3145 state = ds->state;
3146 btrfsic_dev_state_hashtable_remove(ds);
3147 btrfsic_dev_state_free(ds);
3148 }
3149 }
3150
3151 if (NULL == state) {
3152 printk(KERN_INFO
3153 "btrfsic: error, cannot find state information"
3154 " on umount!\n");
3155 mutex_unlock(&btrfsic_mutex);
3156 return;
3157 }
3158
3159 /*
3160 * Don't care about keeping the lists' state up to date,
3161 * just free all memory that was allocated dynamically.
3162 * Free the blocks and the block_links.
3163 */
b69f2bef
GT
3164 list_for_each_entry_safe(b_all, tmp_all, &state->all_blocks_list,
3165 all_blocks_node) {
3166 struct btrfsic_block_link *l, *tmp;
5db02760 3167
b69f2bef
GT
3168 list_for_each_entry_safe(l, tmp, &b_all->ref_to_list,
3169 node_ref_to) {
5db02760
SB
3170 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
3171 btrfsic_print_rem_link(state, l);
3172
3173 l->ref_cnt--;
3174 if (0 == l->ref_cnt)
3175 btrfsic_block_link_free(l);
3176 }
3177
48235a68 3178 if (b_all->is_iodone || b_all->never_written)
5db02760
SB
3179 btrfsic_block_free(b_all);
3180 else
3181 printk(KERN_INFO "btrfs: attempt to free %c-block"
3182 " @%llu (%s/%llu/%d) on umount which is"
3183 " not yet iodone!\n",
3184 btrfsic_get_block_type(state, b_all),
c1c9ff7c
GU
3185 b_all->logical_bytenr, b_all->dev_state->name,
3186 b_all->dev_bytenr, b_all->mirror_num);
5db02760
SB
3187 }
3188
3189 mutex_unlock(&btrfsic_mutex);
3190
f749303b 3191 kvfree(state);
5db02760 3192}