Btrfs: verify csums on read
[linux-2.6-block.git] / fs / btrfs / disk-io.c
... / ...
CommitLineData
1#include <linux/module.h>
2#include <linux/fs.h>
3#include <linux/blkdev.h>
4#include <linux/crypto.h>
5#include <linux/scatterlist.h>
6#include "ctree.h"
7#include "disk-io.h"
8#include "transaction.h"
9
10
11static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf)
12{
13 struct btrfs_node *node = btrfs_buffer_node(buf);
14 if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) {
15 BUG();
16 }
17 if (root->node && btrfs_header_parentid(&node->header) !=
18 btrfs_header_parentid(btrfs_buffer_header(root->node))) {
19 BUG();
20 }
21 return 0;
22}
23
24struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr)
25{
26 struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
27 int blockbits = root->fs_info->sb->s_blocksize_bits;
28 unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits);
29 struct page *page;
30 struct buffer_head *bh;
31 struct buffer_head *head;
32 struct buffer_head *ret = NULL;
33
34 page = find_lock_page(mapping, index);
35 if (!page)
36 return NULL;
37
38 if (!page_has_buffers(page))
39 goto out_unlock;
40
41 head = page_buffers(page);
42 bh = head;
43 do {
44 if (buffer_mapped(bh) && bh->b_blocknr == blocknr) {
45 ret = bh;
46 get_bh(bh);
47 goto out_unlock;
48 }
49 bh = bh->b_this_page;
50 } while (bh != head);
51out_unlock:
52 unlock_page(page);
53 page_cache_release(page);
54 return ret;
55}
56
57struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root,
58 u64 blocknr)
59{
60 struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
61 int blockbits = root->fs_info->sb->s_blocksize_bits;
62 unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits);
63 struct page *page;
64 struct buffer_head *bh;
65 struct buffer_head *head;
66 struct buffer_head *ret = NULL;
67 u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits);
68 page = grab_cache_page(mapping, index);
69 if (!page)
70 return NULL;
71
72 if (!page_has_buffers(page))
73 create_empty_buffers(page, root->fs_info->sb->s_blocksize, 0);
74 head = page_buffers(page);
75 bh = head;
76 do {
77 if (!buffer_mapped(bh)) {
78 bh->b_bdev = root->fs_info->sb->s_bdev;
79 bh->b_blocknr = first_block;
80 set_buffer_mapped(bh);
81 }
82 if (bh->b_blocknr == blocknr) {
83 ret = bh;
84 get_bh(bh);
85 goto out_unlock;
86 }
87 bh = bh->b_this_page;
88 first_block++;
89 } while (bh != head);
90out_unlock:
91 unlock_page(page);
92 page_cache_release(page);
93 return ret;
94}
95
96static sector_t max_block(struct block_device *bdev)
97{
98 sector_t retval = ~((sector_t)0);
99 loff_t sz = i_size_read(bdev->bd_inode);
100
101 if (sz) {
102 unsigned int size = block_size(bdev);
103 unsigned int sizebits = blksize_bits(size);
104 retval = (sz >> sizebits);
105 }
106 return retval;
107}
108
109static int btree_get_block(struct inode *inode, sector_t iblock,
110 struct buffer_head *bh, int create)
111{
112 if (iblock >= max_block(inode->i_sb->s_bdev)) {
113 if (create)
114 return -EIO;
115
116 /*
117 * for reads, we're just trying to fill a partial page.
118 * return a hole, they will have to call get_block again
119 * before they can fill it, and they will get -EIO at that
120 * time
121 */
122 return 0;
123 }
124 bh->b_bdev = inode->i_sb->s_bdev;
125 bh->b_blocknr = iblock;
126 set_buffer_mapped(bh);
127 return 0;
128}
129
130int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len,
131 char *result)
132{
133 struct scatterlist sg;
134 struct crypto_hash *tfm = root->fs_info->hash_tfm;
135 struct hash_desc desc;
136 int ret;
137
138 desc.tfm = tfm;
139 desc.flags = 0;
140 sg_init_one(&sg, data, len);
141 spin_lock(&root->fs_info->hash_lock);
142 ret = crypto_hash_digest(&desc, &sg, len, result);
143 spin_unlock(&root->fs_info->hash_lock);
144 if (ret) {
145 printk("sha256 digest failed\n");
146 }
147 return ret;
148}
149static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh,
150 int verify)
151{
152 char result[BTRFS_CSUM_SIZE];
153 int ret;
154 struct btrfs_node *node;
155
156 ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE,
157 bh->b_size - BTRFS_CSUM_SIZE, result);
158 if (ret)
159 return ret;
160 if (verify) {
161 if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) {
162 printk("checksum verify failed on %lu\n",
163 bh->b_blocknr);
164 return 1;
165 }
166 } else {
167 node = btrfs_buffer_node(bh);
168 memcpy(&node->header.csum, result, BTRFS_CSUM_SIZE);
169 }
170 return 0;
171}
172
173static int btree_writepage(struct page *page, struct writeback_control *wbc)
174{
175 struct buffer_head *bh;
176 struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb);
177 struct buffer_head *head;
178
179 if (!page_has_buffers(page)) {
180 create_empty_buffers(page, root->fs_info->sb->s_blocksize,
181 (1 << BH_Dirty)|(1 << BH_Uptodate));
182 }
183 head = page_buffers(page);
184 bh = head;
185 do {
186 if (buffer_dirty(bh))
187 csum_tree_block(root, bh, 0);
188 bh = bh->b_this_page;
189 } while (bh != head);
190 return block_write_full_page(page, btree_get_block, wbc);
191}
192
193static int btree_readpage(struct file * file, struct page * page)
194{
195 return block_read_full_page(page, btree_get_block);
196}
197
198static struct address_space_operations btree_aops = {
199 .readpage = btree_readpage,
200 .writepage = btree_writepage,
201 .sync_page = block_sync_page,
202};
203
204struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr)
205{
206 struct buffer_head *bh = NULL;
207
208 bh = btrfs_find_create_tree_block(root, blocknr);
209 if (!bh)
210 return bh;
211 lock_buffer(bh);
212 if (!buffer_uptodate(bh)) {
213 get_bh(bh);
214 bh->b_end_io = end_buffer_read_sync;
215 submit_bh(READ, bh);
216 wait_on_buffer(bh);
217 if (!buffer_uptodate(bh))
218 goto fail;
219 csum_tree_block(root, bh, 1);
220 } else {
221 unlock_buffer(bh);
222 }
223 if (check_tree_block(root, bh))
224 BUG();
225 return bh;
226fail:
227 brelse(bh);
228 return NULL;
229
230}
231
232int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
233 struct buffer_head *buf)
234{
235 mark_buffer_dirty(buf);
236 return 0;
237}
238
239int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
240 struct buffer_head *buf)
241{
242 clear_buffer_dirty(buf);
243 return 0;
244}
245
246static int __setup_root(struct btrfs_super_block *super,
247 struct btrfs_root *root,
248 struct btrfs_fs_info *fs_info,
249 u64 objectid)
250{
251 root->node = NULL;
252 root->commit_root = NULL;
253 root->blocksize = btrfs_super_blocksize(super);
254 root->ref_cows = 0;
255 root->fs_info = fs_info;
256 memset(&root->root_key, 0, sizeof(root->root_key));
257 memset(&root->root_item, 0, sizeof(root->root_item));
258 return 0;
259}
260
261static int find_and_setup_root(struct btrfs_super_block *super,
262 struct btrfs_root *tree_root,
263 struct btrfs_fs_info *fs_info,
264 u64 objectid,
265 struct btrfs_root *root)
266{
267 int ret;
268
269 __setup_root(super, root, fs_info, objectid);
270 ret = btrfs_find_last_root(tree_root, objectid,
271 &root->root_item, &root->root_key);
272 BUG_ON(ret);
273
274 root->node = read_tree_block(root,
275 btrfs_root_blocknr(&root->root_item));
276 BUG_ON(!root->node);
277 return 0;
278}
279
280struct btrfs_root *open_ctree(struct super_block *sb,
281 struct buffer_head *sb_buffer,
282 struct btrfs_super_block *disk_super)
283{
284 struct btrfs_root *root = kmalloc(sizeof(struct btrfs_root),
285 GFP_NOFS);
286 struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root),
287 GFP_NOFS);
288 struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root),
289 GFP_NOFS);
290 struct btrfs_root *inode_root = kmalloc(sizeof(struct btrfs_root),
291 GFP_NOFS);
292 struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info),
293 GFP_NOFS);
294 int ret;
295
296 if (!btrfs_super_root(disk_super)) {
297 return NULL;
298 }
299 init_bit_radix(&fs_info->pinned_radix);
300 init_bit_radix(&fs_info->pending_del_radix);
301 sb_set_blocksize(sb, sb_buffer->b_size);
302 fs_info->running_transaction = NULL;
303 fs_info->fs_root = root;
304 fs_info->tree_root = tree_root;
305 fs_info->extent_root = extent_root;
306 fs_info->inode_root = inode_root;
307 fs_info->last_inode_alloc = 0;
308 fs_info->last_inode_alloc_dirid = 0;
309 fs_info->disk_super = disk_super;
310 fs_info->sb = sb;
311 fs_info->btree_inode = new_inode(sb);
312 fs_info->btree_inode->i_ino = 1;
313 fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size;
314 fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
315 mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
316 fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC);
317 spin_lock_init(&fs_info->hash_lock);
318
319 if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) {
320 printk("failed to allocate sha256 hash\n");
321 return NULL;
322 }
323
324 mutex_init(&fs_info->trans_mutex);
325 mutex_init(&fs_info->fs_mutex);
326 memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert));
327 memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert));
328
329 __setup_root(disk_super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID);
330
331 fs_info->sb_buffer = read_tree_block(tree_root, sb_buffer->b_blocknr);
332
333 if (!fs_info->sb_buffer) {
334printk("failed2\n");
335 return NULL;
336 }
337 brelse(sb_buffer);
338 sb_buffer = NULL;
339 disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data;
340 fs_info->disk_super = disk_super;
341
342 tree_root->node = read_tree_block(tree_root,
343 btrfs_super_root(disk_super));
344 BUG_ON(!tree_root->node);
345
346 ret = find_and_setup_root(disk_super, tree_root, fs_info,
347 BTRFS_EXTENT_TREE_OBJECTID, extent_root);
348 BUG_ON(ret);
349
350 ret = find_and_setup_root(disk_super, tree_root, fs_info,
351 BTRFS_INODE_MAP_OBJECTID, inode_root);
352 BUG_ON(ret);
353
354 ret = find_and_setup_root(disk_super, tree_root, fs_info,
355 BTRFS_FS_TREE_OBJECTID, root);
356 BUG_ON(ret);
357 root->commit_root = root->node;
358 get_bh(root->node);
359 root->ref_cows = 1;
360 root->fs_info->generation = root->root_key.offset + 1;
361 return root;
362}
363
364int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
365 *root)
366{
367 struct buffer_head *bh = root->fs_info->sb_buffer;
368 btrfs_set_super_root(root->fs_info->disk_super,
369 root->fs_info->tree_root->node->b_blocknr);
370 lock_buffer(bh);
371 clear_buffer_dirty(bh);
372 csum_tree_block(root, bh, 0);
373 bh->b_end_io = end_buffer_write_sync;
374 get_bh(bh);
375 submit_bh(WRITE, bh);
376 wait_on_buffer(bh);
377 if (!buffer_uptodate(bh)) {
378 WARN_ON(1);
379 return -EIO;
380 }
381 return 0;
382}
383
384int close_ctree(struct btrfs_root *root)
385{
386 int ret;
387 struct btrfs_trans_handle *trans;
388
389 trans = btrfs_start_transaction(root, 1);
390 btrfs_commit_transaction(trans, root);
391 /* run commit again to drop the original snapshot */
392 trans = btrfs_start_transaction(root, 1);
393 btrfs_commit_transaction(trans, root);
394 ret = btrfs_write_and_wait_transaction(NULL, root);
395 BUG_ON(ret);
396 write_ctree_super(NULL, root);
397
398 if (root->node)
399 btrfs_block_release(root, root->node);
400 if (root->fs_info->extent_root->node)
401 btrfs_block_release(root->fs_info->extent_root,
402 root->fs_info->extent_root->node);
403 if (root->fs_info->inode_root->node)
404 btrfs_block_release(root->fs_info->inode_root,
405 root->fs_info->inode_root->node);
406 if (root->fs_info->tree_root->node)
407 btrfs_block_release(root->fs_info->tree_root,
408 root->fs_info->tree_root->node);
409 btrfs_block_release(root, root->commit_root);
410 btrfs_block_release(root, root->fs_info->sb_buffer);
411 crypto_free_hash(root->fs_info->hash_tfm);
412 truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0);
413 iput(root->fs_info->btree_inode);
414 kfree(root->fs_info->extent_root);
415 kfree(root->fs_info->inode_root);
416 kfree(root->fs_info->tree_root);
417 kfree(root->fs_info);
418 kfree(root);
419 return 0;
420}
421
422void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf)
423{
424 brelse(buf);
425}
426