Btrfs: verify csums on read
[linux-2.6-block.git] / fs / btrfs / disk-io.c
CommitLineData
e20d96d6
CM
1#include <linux/module.h>
2#include <linux/fs.h>
d98237b3 3#include <linux/blkdev.h>
87cbda5c
CM
4#include <linux/crypto.h>
5#include <linux/scatterlist.h>
eb60ceac
CM
6#include "ctree.h"
7#include "disk-io.h"
e089f05c 8#include "transaction.h"
eb60ceac 9
d98237b3 10
e20d96d6 11static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf)
eb60ceac 12{
e20d96d6 13 struct btrfs_node *node = btrfs_buffer_node(buf);
d98237b3 14 if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) {
9a8dd150 15 BUG();
d98237b3 16 }
e20d96d6 17 if (root->node && btrfs_header_parentid(&node->header) !=
df2ce34c 18 btrfs_header_parentid(btrfs_buffer_header(root->node))) {
7f5c1516 19 BUG();
df2ce34c 20 }
9a8dd150 21 return 0;
eb60ceac
CM
22}
23
d98237b3
CM
24struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr)
25{
26 struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
27 int blockbits = root->fs_info->sb->s_blocksize_bits;
28 unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits);
29 struct page *page;
30 struct buffer_head *bh;
31 struct buffer_head *head;
32 struct buffer_head *ret = NULL;
33
34 page = find_lock_page(mapping, index);
35 if (!page)
36 return NULL;
37
38 if (!page_has_buffers(page))
39 goto out_unlock;
40
41 head = page_buffers(page);
42 bh = head;
43 do {
44 if (buffer_mapped(bh) && bh->b_blocknr == blocknr) {
45 ret = bh;
46 get_bh(bh);
47 goto out_unlock;
48 }
49 bh = bh->b_this_page;
50 } while (bh != head);
51out_unlock:
52 unlock_page(page);
53 page_cache_release(page);
54 return ret;
55}
56
57struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root,
58 u64 blocknr)
59{
60 struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
61 int blockbits = root->fs_info->sb->s_blocksize_bits;
62 unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits);
63 struct page *page;
64 struct buffer_head *bh;
65 struct buffer_head *head;
66 struct buffer_head *ret = NULL;
67 u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits);
68 page = grab_cache_page(mapping, index);
69 if (!page)
70 return NULL;
71
d98237b3
CM
72 if (!page_has_buffers(page))
73 create_empty_buffers(page, root->fs_info->sb->s_blocksize, 0);
74 head = page_buffers(page);
75 bh = head;
76 do {
77 if (!buffer_mapped(bh)) {
78 bh->b_bdev = root->fs_info->sb->s_bdev;
79 bh->b_blocknr = first_block;
80 set_buffer_mapped(bh);
81 }
82 if (bh->b_blocknr == blocknr) {
83 ret = bh;
84 get_bh(bh);
85 goto out_unlock;
86 }
87 bh = bh->b_this_page;
88 first_block++;
89 } while (bh != head);
90out_unlock:
91 unlock_page(page);
92 page_cache_release(page);
93 return ret;
94}
95
96static sector_t max_block(struct block_device *bdev)
97{
98 sector_t retval = ~((sector_t)0);
99 loff_t sz = i_size_read(bdev->bd_inode);
100
101 if (sz) {
102 unsigned int size = block_size(bdev);
103 unsigned int sizebits = blksize_bits(size);
104 retval = (sz >> sizebits);
105 }
106 return retval;
107}
108
109static int btree_get_block(struct inode *inode, sector_t iblock,
110 struct buffer_head *bh, int create)
111{
112 if (iblock >= max_block(inode->i_sb->s_bdev)) {
113 if (create)
114 return -EIO;
115
116 /*
117 * for reads, we're just trying to fill a partial page.
118 * return a hole, they will have to call get_block again
119 * before they can fill it, and they will get -EIO at that
120 * time
121 */
122 return 0;
123 }
124 bh->b_bdev = inode->i_sb->s_bdev;
125 bh->b_blocknr = iblock;
126 set_buffer_mapped(bh);
127 return 0;
128}
129
f254e52c
CM
130int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len,
131 char *result)
87cbda5c 132{
87cbda5c
CM
133 struct scatterlist sg;
134 struct crypto_hash *tfm = root->fs_info->hash_tfm;
135 struct hash_desc desc;
136 int ret;
87cbda5c
CM
137
138 desc.tfm = tfm;
139 desc.flags = 0;
f254e52c 140 sg_init_one(&sg, data, len);
87cbda5c 141 spin_lock(&root->fs_info->hash_lock);
f254e52c 142 ret = crypto_hash_digest(&desc, &sg, len, result);
87cbda5c
CM
143 spin_unlock(&root->fs_info->hash_lock);
144 if (ret) {
145 printk("sha256 digest failed\n");
146 }
f254e52c
CM
147 return ret;
148}
149static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh,
150 int verify)
151{
152 char result[BTRFS_CSUM_SIZE];
153 int ret;
154 struct btrfs_node *node;
155
156 ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE,
157 bh->b_size - BTRFS_CSUM_SIZE, result);
158 if (ret)
159 return ret;
87cbda5c 160 if (verify) {
f254e52c
CM
161 if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) {
162 printk("checksum verify failed on %lu\n",
163 bh->b_blocknr);
164 return 1;
165 }
166 } else {
167 node = btrfs_buffer_node(bh);
168 memcpy(&node->header.csum, result, BTRFS_CSUM_SIZE);
169 }
87cbda5c
CM
170 return 0;
171}
172
d98237b3 173static int btree_writepage(struct page *page, struct writeback_control *wbc)
ed2ff2cb 174{
87cbda5c
CM
175 struct buffer_head *bh;
176 struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb);
177 struct buffer_head *head;
178
179 if (!page_has_buffers(page)) {
180 create_empty_buffers(page, root->fs_info->sb->s_blocksize,
181 (1 << BH_Dirty)|(1 << BH_Uptodate));
182 }
183 head = page_buffers(page);
184 bh = head;
185 do {
186 if (buffer_dirty(bh))
187 csum_tree_block(root, bh, 0);
188 bh = bh->b_this_page;
189 } while (bh != head);
d98237b3 190 return block_write_full_page(page, btree_get_block, wbc);
ed2ff2cb
CM
191}
192
d98237b3 193static int btree_readpage(struct file * file, struct page * page)
eb60ceac 194{
d98237b3 195 return block_read_full_page(page, btree_get_block);
eb60ceac
CM
196}
197
d98237b3
CM
198static struct address_space_operations btree_aops = {
199 .readpage = btree_readpage,
200 .writepage = btree_writepage,
201 .sync_page = block_sync_page,
202};
203
e20d96d6 204struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr)
eb60ceac 205{
d98237b3 206 struct buffer_head *bh = NULL;
eb60ceac 207
d98237b3
CM
208 bh = btrfs_find_create_tree_block(root, blocknr);
209 if (!bh)
210 return bh;
211 lock_buffer(bh);
212 if (!buffer_uptodate(bh)) {
213 get_bh(bh);
214 bh->b_end_io = end_buffer_read_sync;
215 submit_bh(READ, bh);
216 wait_on_buffer(bh);
217 if (!buffer_uptodate(bh))
218 goto fail;
87cbda5c 219 csum_tree_block(root, bh, 1);
d98237b3
CM
220 } else {
221 unlock_buffer(bh);
222 }
223 if (check_tree_block(root, bh))
cfaa7295 224 BUG();
d98237b3
CM
225 return bh;
226fail:
227 brelse(bh);
228 return NULL;
229
eb60ceac
CM
230}
231
e089f05c 232int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
e20d96d6 233 struct buffer_head *buf)
ed2ff2cb 234{
e20d96d6 235 mark_buffer_dirty(buf);
ed2ff2cb
CM
236 return 0;
237}
238
e089f05c 239int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
e20d96d6 240 struct buffer_head *buf)
ed2ff2cb 241{
e20d96d6 242 clear_buffer_dirty(buf);
ed2ff2cb
CM
243 return 0;
244}
245
123abc88 246static int __setup_root(struct btrfs_super_block *super,
9f5fae2f
CM
247 struct btrfs_root *root,
248 struct btrfs_fs_info *fs_info,
e20d96d6 249 u64 objectid)
d97e63b6 250{
cfaa7295 251 root->node = NULL;
a28ec197 252 root->commit_root = NULL;
123abc88
CM
253 root->blocksize = btrfs_super_blocksize(super);
254 root->ref_cows = 0;
9f5fae2f 255 root->fs_info = fs_info;
3768f368
CM
256 memset(&root->root_key, 0, sizeof(root->root_key));
257 memset(&root->root_item, 0, sizeof(root->root_item));
258 return 0;
259}
260
123abc88 261static int find_and_setup_root(struct btrfs_super_block *super,
9f5fae2f
CM
262 struct btrfs_root *tree_root,
263 struct btrfs_fs_info *fs_info,
264 u64 objectid,
e20d96d6 265 struct btrfs_root *root)
3768f368
CM
266{
267 int ret;
268
e20d96d6 269 __setup_root(super, root, fs_info, objectid);
3768f368
CM
270 ret = btrfs_find_last_root(tree_root, objectid,
271 &root->root_item, &root->root_key);
272 BUG_ON(ret);
273
274 root->node = read_tree_block(root,
275 btrfs_root_blocknr(&root->root_item));
3768f368 276 BUG_ON(!root->node);
d97e63b6
CM
277 return 0;
278}
279
e20d96d6
CM
280struct btrfs_root *open_ctree(struct super_block *sb,
281 struct buffer_head *sb_buffer,
282 struct btrfs_super_block *disk_super)
2e635a27 283{
e20d96d6
CM
284 struct btrfs_root *root = kmalloc(sizeof(struct btrfs_root),
285 GFP_NOFS);
286 struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root),
287 GFP_NOFS);
288 struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root),
289 GFP_NOFS);
290 struct btrfs_root *inode_root = kmalloc(sizeof(struct btrfs_root),
291 GFP_NOFS);
292 struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info),
293 GFP_NOFS);
eb60ceac
CM
294 int ret;
295
87cbda5c 296 if (!btrfs_super_root(disk_super)) {
e20d96d6 297 return NULL;
87cbda5c 298 }
8ef97622
CM
299 init_bit_radix(&fs_info->pinned_radix);
300 init_bit_radix(&fs_info->pending_del_radix);
d98237b3 301 sb_set_blocksize(sb, sb_buffer->b_size);
9f5fae2f
CM
302 fs_info->running_transaction = NULL;
303 fs_info->fs_root = root;
304 fs_info->tree_root = tree_root;
305 fs_info->extent_root = extent_root;
306 fs_info->inode_root = inode_root;
307 fs_info->last_inode_alloc = 0;
308 fs_info->last_inode_alloc_dirid = 0;
e20d96d6 309 fs_info->disk_super = disk_super;
e20d96d6 310 fs_info->sb = sb;
d98237b3
CM
311 fs_info->btree_inode = new_inode(sb);
312 fs_info->btree_inode->i_ino = 1;
313 fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size;
314 fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
315 mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
87cbda5c 316 fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC);
30ae8467
CM
317 spin_lock_init(&fs_info->hash_lock);
318
319 if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) {
87cbda5c
CM
320 printk("failed to allocate sha256 hash\n");
321 return NULL;
322 }
d98237b3 323
79154b1b 324 mutex_init(&fs_info->trans_mutex);
d561c025 325 mutex_init(&fs_info->fs_mutex);
9f5fae2f
CM
326 memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert));
327 memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert));
3768f368 328
e20d96d6 329 __setup_root(disk_super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID);
d98237b3
CM
330
331 fs_info->sb_buffer = read_tree_block(tree_root, sb_buffer->b_blocknr);
332
87cbda5c
CM
333 if (!fs_info->sb_buffer) {
334printk("failed2\n");
d98237b3 335 return NULL;
87cbda5c 336 }
d98237b3
CM
337 brelse(sb_buffer);
338 sb_buffer = NULL;
339 disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data;
340 fs_info->disk_super = disk_super;
341
e20d96d6
CM
342 tree_root->node = read_tree_block(tree_root,
343 btrfs_super_root(disk_super));
3768f368
CM
344 BUG_ON(!tree_root->node);
345
e20d96d6
CM
346 ret = find_and_setup_root(disk_super, tree_root, fs_info,
347 BTRFS_EXTENT_TREE_OBJECTID, extent_root);
3768f368
CM
348 BUG_ON(ret);
349
e20d96d6
CM
350 ret = find_and_setup_root(disk_super, tree_root, fs_info,
351 BTRFS_INODE_MAP_OBJECTID, inode_root);
9f5fae2f
CM
352 BUG_ON(ret);
353
e20d96d6
CM
354 ret = find_and_setup_root(disk_super, tree_root, fs_info,
355 BTRFS_FS_TREE_OBJECTID, root);
3768f368 356 BUG_ON(ret);
a28ec197 357 root->commit_root = root->node;
e20d96d6 358 get_bh(root->node);
3768f368 359 root->ref_cows = 1;
293ffd5f 360 root->fs_info->generation = root->root_key.offset + 1;
eb60ceac
CM
361 return root;
362}
363
e089f05c 364int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
79154b1b 365 *root)
eb60ceac 366{
d5719762
CM
367 struct buffer_head *bh = root->fs_info->sb_buffer;
368 btrfs_set_super_root(root->fs_info->disk_super,
369 root->fs_info->tree_root->node->b_blocknr);
370 lock_buffer(bh);
371 clear_buffer_dirty(bh);
87cbda5c 372 csum_tree_block(root, bh, 0);
d5719762
CM
373 bh->b_end_io = end_buffer_write_sync;
374 get_bh(bh);
375 submit_bh(WRITE, bh);
376 wait_on_buffer(bh);
377 if (!buffer_uptodate(bh)) {
378 WARN_ON(1);
379 return -EIO;
cfaa7295
CM
380 }
381 return 0;
382}
383
e20d96d6 384int close_ctree(struct btrfs_root *root)
cfaa7295 385{
3768f368 386 int ret;
e089f05c
CM
387 struct btrfs_trans_handle *trans;
388
79154b1b
CM
389 trans = btrfs_start_transaction(root, 1);
390 btrfs_commit_transaction(trans, root);
391 /* run commit again to drop the original snapshot */
392 trans = btrfs_start_transaction(root, 1);
393 btrfs_commit_transaction(trans, root);
394 ret = btrfs_write_and_wait_transaction(NULL, root);
3768f368 395 BUG_ON(ret);
79154b1b 396 write_ctree_super(NULL, root);
ed2ff2cb 397
cfaa7295 398 if (root->node)
234b63a0 399 btrfs_block_release(root, root->node);
9f5fae2f
CM
400 if (root->fs_info->extent_root->node)
401 btrfs_block_release(root->fs_info->extent_root,
402 root->fs_info->extent_root->node);
403 if (root->fs_info->inode_root->node)
404 btrfs_block_release(root->fs_info->inode_root,
405 root->fs_info->inode_root->node);
406 if (root->fs_info->tree_root->node)
407 btrfs_block_release(root->fs_info->tree_root,
408 root->fs_info->tree_root->node);
234b63a0 409 btrfs_block_release(root, root->commit_root);
e20d96d6 410 btrfs_block_release(root, root->fs_info->sb_buffer);
87cbda5c 411 crypto_free_hash(root->fs_info->hash_tfm);
30ae8467 412 truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0);
d98237b3 413 iput(root->fs_info->btree_inode);
e20d96d6
CM
414 kfree(root->fs_info->extent_root);
415 kfree(root->fs_info->inode_root);
416 kfree(root->fs_info->tree_root);
417 kfree(root->fs_info);
418 kfree(root);
eb60ceac
CM
419 return 0;
420}
421
e20d96d6 422void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf)
eb60ceac 423{
e20d96d6 424 brelse(buf);
eb60ceac
CM
425}
426