Commit | Line | Data |
---|---|---|
c1d7c514 | 1 | // SPDX-License-Identifier: GPL-2.0 |
6cbd5570 CM |
2 | /* |
3 | * Copyright (C) 2007 Oracle. All rights reserved. | |
6cbd5570 CM |
4 | */ |
5 | ||
065631f6 | 6 | #include <linux/bio.h> |
5a0e3ad6 | 7 | #include <linux/slab.h> |
065631f6 CM |
8 | #include <linux/pagemap.h> |
9 | #include <linux/highmem.h> | |
a3d46aea | 10 | #include <linux/sched/mm.h> |
d5178578 | 11 | #include <crypto/hash.h> |
cea62800 | 12 | #include "misc.h" |
1e1d2701 | 13 | #include "ctree.h" |
dee26a9f | 14 | #include "disk-io.h" |
9f5fae2f | 15 | #include "transaction.h" |
facc8a22 | 16 | #include "volumes.h" |
1de037a4 | 17 | #include "print-tree.h" |
ebb8765b | 18 | #include "compression.h" |
1e1d2701 | 19 | |
42049bf6 CM |
20 | #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ |
21 | sizeof(struct btrfs_item) * 2) / \ | |
22 | size) - 1)) | |
07d400a6 | 23 | |
221b8318 | 24 | #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \ |
09cbfeaf | 25 | PAGE_SIZE)) |
7ca4be45 | 26 | |
41a2ee75 | 27 | /** |
ca4207ae NB |
28 | * Set inode's size according to filesystem options |
29 | * | |
30 | * @inode: inode we want to update the disk_i_size for | |
31 | * @new_i_size: i_size we want to set to, 0 if we use i_size | |
41a2ee75 JB |
32 | * |
33 | * With NO_HOLES set this simply sets the disk_is_size to whatever i_size_read() | |
34 | * returns as it is perfectly fine with a file that has holes without hole file | |
35 | * extent items. | |
36 | * | |
37 | * However without NO_HOLES we need to only return the area that is contiguous | |
38 | * from the 0 offset of the file. Otherwise we could end up adjust i_size up | |
39 | * to an extent that has a gap in between. | |
40 | * | |
41 | * Finally new_i_size should only be set in the case of truncate where we're not | |
42 | * ready to use i_size_read() as the limiter yet. | |
43 | */ | |
76aea537 | 44 | void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size) |
41a2ee75 | 45 | { |
76aea537 | 46 | struct btrfs_fs_info *fs_info = inode->root->fs_info; |
41a2ee75 JB |
47 | u64 start, end, i_size; |
48 | int ret; | |
49 | ||
76aea537 | 50 | i_size = new_i_size ?: i_size_read(&inode->vfs_inode); |
41a2ee75 | 51 | if (btrfs_fs_incompat(fs_info, NO_HOLES)) { |
76aea537 | 52 | inode->disk_i_size = i_size; |
41a2ee75 JB |
53 | return; |
54 | } | |
55 | ||
76aea537 NB |
56 | spin_lock(&inode->lock); |
57 | ret = find_contiguous_extent_bit(&inode->file_extent_tree, 0, &start, | |
58 | &end, EXTENT_DIRTY); | |
41a2ee75 JB |
59 | if (!ret && start == 0) |
60 | i_size = min(i_size, end + 1); | |
61 | else | |
62 | i_size = 0; | |
76aea537 NB |
63 | inode->disk_i_size = i_size; |
64 | spin_unlock(&inode->lock); | |
41a2ee75 JB |
65 | } |
66 | ||
67 | /** | |
ca4207ae NB |
68 | * Mark range within a file as having a new extent inserted |
69 | * | |
70 | * @inode: inode being modified | |
71 | * @start: start file offset of the file extent we've inserted | |
72 | * @len: logical length of the file extent item | |
41a2ee75 JB |
73 | * |
74 | * Call when we are inserting a new file extent where there was none before. | |
75 | * Does not need to call this in the case where we're replacing an existing file | |
76 | * extent, however if not sure it's fine to call this multiple times. | |
77 | * | |
78 | * The start and len must match the file extent item, so thus must be sectorsize | |
79 | * aligned. | |
80 | */ | |
81 | int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start, | |
82 | u64 len) | |
83 | { | |
84 | if (len == 0) | |
85 | return 0; | |
86 | ||
87 | ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize)); | |
88 | ||
89 | if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES)) | |
90 | return 0; | |
91 | return set_extent_bits(&inode->file_extent_tree, start, start + len - 1, | |
92 | EXTENT_DIRTY); | |
93 | } | |
94 | ||
95 | /** | |
ca4207ae NB |
96 | * Marks an inode range as not having a backing extent |
97 | * | |
98 | * @inode: inode being modified | |
99 | * @start: start file offset of the file extent we've inserted | |
100 | * @len: logical length of the file extent item | |
41a2ee75 JB |
101 | * |
102 | * Called when we drop a file extent, for example when we truncate. Doesn't | |
103 | * need to be called for cases where we're replacing a file extent, like when | |
104 | * we've COWed a file extent. | |
105 | * | |
106 | * The start and len must match the file extent item, so thus must be sectorsize | |
107 | * aligned. | |
108 | */ | |
109 | int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start, | |
110 | u64 len) | |
111 | { | |
112 | if (len == 0) | |
113 | return 0; | |
114 | ||
115 | ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize) || | |
116 | len == (u64)-1); | |
117 | ||
118 | if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES)) | |
119 | return 0; | |
120 | return clear_extent_bit(&inode->file_extent_tree, start, | |
121 | start + len - 1, EXTENT_DIRTY, 0, 0, NULL); | |
122 | } | |
123 | ||
1e25a2e3 JT |
124 | static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info, |
125 | u16 csum_size) | |
126 | { | |
127 | u32 ncsums = (PAGE_SIZE - sizeof(struct btrfs_ordered_sum)) / csum_size; | |
128 | ||
129 | return ncsums * fs_info->sectorsize; | |
130 | } | |
07d400a6 | 131 | |
b18c6685 | 132 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, |
f2eb0a24 SW |
133 | struct btrfs_root *root, |
134 | u64 objectid, u64 pos, | |
135 | u64 disk_offset, u64 disk_num_bytes, | |
c8b97818 CM |
136 | u64 num_bytes, u64 offset, u64 ram_bytes, |
137 | u8 compression, u8 encryption, u16 other_encoding) | |
9f5fae2f | 138 | { |
dee26a9f CM |
139 | int ret = 0; |
140 | struct btrfs_file_extent_item *item; | |
141 | struct btrfs_key file_key; | |
5caf2a00 | 142 | struct btrfs_path *path; |
5f39d397 | 143 | struct extent_buffer *leaf; |
dee26a9f | 144 | |
5caf2a00 | 145 | path = btrfs_alloc_path(); |
db5b493a TI |
146 | if (!path) |
147 | return -ENOMEM; | |
dee26a9f | 148 | file_key.objectid = objectid; |
b18c6685 | 149 | file_key.offset = pos; |
962a298f | 150 | file_key.type = BTRFS_EXTENT_DATA_KEY; |
dee26a9f | 151 | |
5caf2a00 | 152 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, |
dee26a9f | 153 | sizeof(*item)); |
54aa1f4d CM |
154 | if (ret < 0) |
155 | goto out; | |
79787eaa | 156 | BUG_ON(ret); /* Can't happen */ |
5f39d397 CM |
157 | leaf = path->nodes[0]; |
158 | item = btrfs_item_ptr(leaf, path->slots[0], | |
dee26a9f | 159 | struct btrfs_file_extent_item); |
f2eb0a24 | 160 | btrfs_set_file_extent_disk_bytenr(leaf, item, disk_offset); |
db94535d | 161 | btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes); |
f2eb0a24 | 162 | btrfs_set_file_extent_offset(leaf, item, offset); |
db94535d | 163 | btrfs_set_file_extent_num_bytes(leaf, item, num_bytes); |
c8b97818 | 164 | btrfs_set_file_extent_ram_bytes(leaf, item, ram_bytes); |
5f39d397 CM |
165 | btrfs_set_file_extent_generation(leaf, item, trans->transid); |
166 | btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); | |
c8b97818 CM |
167 | btrfs_set_file_extent_compression(leaf, item, compression); |
168 | btrfs_set_file_extent_encryption(leaf, item, encryption); | |
169 | btrfs_set_file_extent_other_encoding(leaf, item, other_encoding); | |
170 | ||
5f39d397 | 171 | btrfs_mark_buffer_dirty(leaf); |
54aa1f4d | 172 | out: |
5caf2a00 | 173 | btrfs_free_path(path); |
54aa1f4d | 174 | return ret; |
9f5fae2f | 175 | } |
dee26a9f | 176 | |
48a3b636 ES |
177 | static struct btrfs_csum_item * |
178 | btrfs_lookup_csum(struct btrfs_trans_handle *trans, | |
179 | struct btrfs_root *root, | |
180 | struct btrfs_path *path, | |
181 | u64 bytenr, int cow) | |
6567e837 | 182 | { |
0b246afa | 183 | struct btrfs_fs_info *fs_info = root->fs_info; |
6567e837 CM |
184 | int ret; |
185 | struct btrfs_key file_key; | |
186 | struct btrfs_key found_key; | |
187 | struct btrfs_csum_item *item; | |
5f39d397 | 188 | struct extent_buffer *leaf; |
6567e837 | 189 | u64 csum_offset = 0; |
223486c2 | 190 | const u32 csum_size = fs_info->csum_size; |
a429e513 | 191 | int csums_in_item; |
6567e837 | 192 | |
d20f7043 CM |
193 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
194 | file_key.offset = bytenr; | |
962a298f | 195 | file_key.type = BTRFS_EXTENT_CSUM_KEY; |
b18c6685 | 196 | ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); |
6567e837 CM |
197 | if (ret < 0) |
198 | goto fail; | |
5f39d397 | 199 | leaf = path->nodes[0]; |
6567e837 CM |
200 | if (ret > 0) { |
201 | ret = 1; | |
70b2befd | 202 | if (path->slots[0] == 0) |
6567e837 CM |
203 | goto fail; |
204 | path->slots[0]--; | |
5f39d397 | 205 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
962a298f | 206 | if (found_key.type != BTRFS_EXTENT_CSUM_KEY) |
6567e837 | 207 | goto fail; |
d20f7043 CM |
208 | |
209 | csum_offset = (bytenr - found_key.offset) >> | |
265fdfa6 | 210 | fs_info->sectorsize_bits; |
5f39d397 | 211 | csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); |
607d432d | 212 | csums_in_item /= csum_size; |
a429e513 | 213 | |
82d130ff | 214 | if (csum_offset == csums_in_item) { |
a429e513 | 215 | ret = -EFBIG; |
6567e837 | 216 | goto fail; |
82d130ff MX |
217 | } else if (csum_offset > csums_in_item) { |
218 | goto fail; | |
6567e837 CM |
219 | } |
220 | } | |
221 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); | |
509659cd | 222 | item = (struct btrfs_csum_item *)((unsigned char *)item + |
607d432d | 223 | csum_offset * csum_size); |
6567e837 CM |
224 | return item; |
225 | fail: | |
226 | if (ret > 0) | |
b18c6685 | 227 | ret = -ENOENT; |
6567e837 CM |
228 | return ERR_PTR(ret); |
229 | } | |
230 | ||
dee26a9f CM |
231 | int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, |
232 | struct btrfs_root *root, | |
233 | struct btrfs_path *path, u64 objectid, | |
9773a788 | 234 | u64 offset, int mod) |
dee26a9f CM |
235 | { |
236 | int ret; | |
237 | struct btrfs_key file_key; | |
238 | int ins_len = mod < 0 ? -1 : 0; | |
239 | int cow = mod != 0; | |
240 | ||
241 | file_key.objectid = objectid; | |
70b2befd | 242 | file_key.offset = offset; |
962a298f | 243 | file_key.type = BTRFS_EXTENT_DATA_KEY; |
dee26a9f CM |
244 | ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); |
245 | return ret; | |
246 | } | |
f254e52c | 247 | |
6275193e QW |
248 | /* |
249 | * Find checksums for logical bytenr range [disk_bytenr, disk_bytenr + len) and | |
250 | * estore the result to @dst. | |
251 | * | |
252 | * Return >0 for the number of sectors we found. | |
253 | * Return 0 for the range [disk_bytenr, disk_bytenr + sectorsize) has no csum | |
254 | * for it. Caller may want to try next sector until one range is hit. | |
255 | * Return <0 for fatal error. | |
256 | */ | |
257 | static int search_csum_tree(struct btrfs_fs_info *fs_info, | |
258 | struct btrfs_path *path, u64 disk_bytenr, | |
259 | u64 len, u8 *dst) | |
260 | { | |
261 | struct btrfs_csum_item *item = NULL; | |
262 | struct btrfs_key key; | |
263 | const u32 sectorsize = fs_info->sectorsize; | |
264 | const u32 csum_size = fs_info->csum_size; | |
265 | u32 itemsize; | |
266 | int ret; | |
267 | u64 csum_start; | |
268 | u64 csum_len; | |
269 | ||
270 | ASSERT(IS_ALIGNED(disk_bytenr, sectorsize) && | |
271 | IS_ALIGNED(len, sectorsize)); | |
272 | ||
273 | /* Check if the current csum item covers disk_bytenr */ | |
274 | if (path->nodes[0]) { | |
275 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | |
276 | struct btrfs_csum_item); | |
277 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | |
278 | itemsize = btrfs_item_size_nr(path->nodes[0], path->slots[0]); | |
279 | ||
280 | csum_start = key.offset; | |
281 | csum_len = (itemsize / csum_size) * sectorsize; | |
282 | ||
283 | if (in_range(disk_bytenr, csum_start, csum_len)) | |
284 | goto found; | |
285 | } | |
286 | ||
287 | /* Current item doesn't contain the desired range, search again */ | |
288 | btrfs_release_path(path); | |
289 | item = btrfs_lookup_csum(NULL, fs_info->csum_root, path, disk_bytenr, 0); | |
290 | if (IS_ERR(item)) { | |
291 | ret = PTR_ERR(item); | |
292 | goto out; | |
293 | } | |
294 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | |
295 | itemsize = btrfs_item_size_nr(path->nodes[0], path->slots[0]); | |
296 | ||
297 | csum_start = key.offset; | |
298 | csum_len = (itemsize / csum_size) * sectorsize; | |
299 | ASSERT(in_range(disk_bytenr, csum_start, csum_len)); | |
300 | ||
301 | found: | |
302 | ret = (min(csum_start + csum_len, disk_bytenr + len) - | |
303 | disk_bytenr) >> fs_info->sectorsize_bits; | |
304 | read_extent_buffer(path->nodes[0], dst, (unsigned long)item, | |
305 | ret * csum_size); | |
306 | out: | |
307 | if (ret == -ENOENT) | |
308 | ret = 0; | |
309 | return ret; | |
310 | } | |
311 | ||
312 | /* | |
313 | * Locate the file_offset of @cur_disk_bytenr of a @bio. | |
314 | * | |
315 | * Bio of btrfs represents read range of | |
316 | * [bi_sector << 9, bi_sector << 9 + bi_size). | |
317 | * Knowing this, we can iterate through each bvec to locate the page belong to | |
318 | * @cur_disk_bytenr and get the file offset. | |
319 | * | |
320 | * @inode is used to determine if the bvec page really belongs to @inode. | |
321 | * | |
322 | * Return 0 if we can't find the file offset | |
323 | * Return >0 if we find the file offset and restore it to @file_offset_ret | |
324 | */ | |
325 | static int search_file_offset_in_bio(struct bio *bio, struct inode *inode, | |
326 | u64 disk_bytenr, u64 *file_offset_ret) | |
327 | { | |
328 | struct bvec_iter iter; | |
329 | struct bio_vec bvec; | |
330 | u64 cur = bio->bi_iter.bi_sector << SECTOR_SHIFT; | |
331 | int ret = 0; | |
332 | ||
333 | bio_for_each_segment(bvec, bio, iter) { | |
334 | struct page *page = bvec.bv_page; | |
335 | ||
336 | if (cur > disk_bytenr) | |
337 | break; | |
338 | if (cur + bvec.bv_len <= disk_bytenr) { | |
339 | cur += bvec.bv_len; | |
340 | continue; | |
341 | } | |
342 | ASSERT(in_range(disk_bytenr, cur, bvec.bv_len)); | |
343 | if (page->mapping && page->mapping->host && | |
344 | page->mapping->host == inode) { | |
345 | ret = 1; | |
346 | *file_offset_ret = page_offset(page) + bvec.bv_offset + | |
347 | disk_bytenr - cur; | |
348 | break; | |
349 | } | |
350 | } | |
351 | return ret; | |
352 | } | |
353 | ||
e62958fc | 354 | /** |
6275193e | 355 | * Lookup the checksum for the read bio in csum tree. |
9e46458a | 356 | * |
e62958fc | 357 | * @inode: inode that the bio is for. |
fb30f470 | 358 | * @bio: bio to look up. |
fb30f470 OS |
359 | * @dst: Buffer of size nblocks * btrfs_super_csum_size() used to return |
360 | * checksum (nblocks = bio->bi_iter.bi_size / fs_info->sectorsize). If | |
361 | * NULL, the checksum buffer is allocated and returned in | |
362 | * btrfs_io_bio(bio)->csum instead. | |
e62958fc OS |
363 | * |
364 | * Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise. | |
365 | */ | |
6275193e | 366 | blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst) |
61b49440 | 367 | { |
0b246afa | 368 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
facc8a22 MX |
369 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
370 | struct btrfs_path *path; | |
6275193e QW |
371 | const u32 sectorsize = fs_info->sectorsize; |
372 | const u32 csum_size = fs_info->csum_size; | |
373 | u32 orig_len = bio->bi_iter.bi_size; | |
374 | u64 orig_disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT; | |
375 | u64 cur_disk_bytenr; | |
facc8a22 | 376 | u8 *csum; |
6275193e | 377 | const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits; |
17347cec | 378 | int count = 0; |
61b49440 | 379 | |
42437a63 | 380 | if (!fs_info->csum_root || (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) |
334c16d8 JB |
381 | return BLK_STS_OK; |
382 | ||
9e46458a QW |
383 | /* |
384 | * This function is only called for read bio. | |
385 | * | |
386 | * This means two things: | |
387 | * - All our csums should only be in csum tree | |
388 | * No ordered extents csums, as ordered extents are only for write | |
389 | * path. | |
6275193e QW |
390 | * - No need to bother any other info from bvec |
391 | * Since we're looking up csums, the only important info is the | |
392 | * disk_bytenr and the length, which can be extracted from bi_iter | |
393 | * directly. | |
9e46458a QW |
394 | */ |
395 | ASSERT(bio_op(bio) == REQ_OP_READ); | |
61b49440 | 396 | path = btrfs_alloc_path(); |
c2db1073 | 397 | if (!path) |
4e4cbee9 | 398 | return BLK_STS_RESOURCE; |
facc8a22 | 399 | |
facc8a22 | 400 | if (!dst) { |
fb30f470 OS |
401 | struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio); |
402 | ||
facc8a22 | 403 | if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { |
31fecccb DS |
404 | btrfs_bio->csum = kmalloc_array(nblocks, csum_size, |
405 | GFP_NOFS); | |
406 | if (!btrfs_bio->csum) { | |
facc8a22 | 407 | btrfs_free_path(path); |
4e4cbee9 | 408 | return BLK_STS_RESOURCE; |
facc8a22 | 409 | } |
facc8a22 MX |
410 | } else { |
411 | btrfs_bio->csum = btrfs_bio->csum_inline; | |
412 | } | |
413 | csum = btrfs_bio->csum; | |
414 | } else { | |
10fe6ca8 | 415 | csum = dst; |
facc8a22 MX |
416 | } |
417 | ||
35478d05 QW |
418 | /* |
419 | * If requested number of sectors is larger than one leaf can contain, | |
420 | * kick the readahead for csum tree. | |
421 | */ | |
422 | if (nblocks > fs_info->csums_per_leaf) | |
e4058b54 | 423 | path->reada = READA_FORWARD; |
61b49440 | 424 | |
2cf8572d CM |
425 | /* |
426 | * the free space stuff is only read when it hasn't been | |
427 | * updated in the current transaction. So, we can safely | |
428 | * read from the commit root and sidestep a nasty deadlock | |
429 | * between reading the free space cache and updating the csum tree. | |
430 | */ | |
70ddc553 | 431 | if (btrfs_is_free_space_inode(BTRFS_I(inode))) { |
2cf8572d | 432 | path->search_commit_root = 1; |
ddf23b3f JB |
433 | path->skip_locking = 1; |
434 | } | |
2cf8572d | 435 | |
6275193e QW |
436 | for (cur_disk_bytenr = orig_disk_bytenr; |
437 | cur_disk_bytenr < orig_disk_bytenr + orig_len; | |
438 | cur_disk_bytenr += (count * sectorsize)) { | |
439 | u64 search_len = orig_disk_bytenr + orig_len - cur_disk_bytenr; | |
440 | unsigned int sector_offset; | |
441 | u8 *csum_dst; | |
c40a3d38 | 442 | |
61b49440 | 443 | /* |
6275193e QW |
444 | * Although both cur_disk_bytenr and orig_disk_bytenr is u64, |
445 | * we're calculating the offset to the bio start. | |
446 | * | |
447 | * Bio size is limited to UINT_MAX, thus unsigned int is large | |
448 | * enough to contain the raw result, not to mention the right | |
449 | * shifted result. | |
61b49440 | 450 | */ |
6275193e QW |
451 | ASSERT(cur_disk_bytenr - orig_disk_bytenr < UINT_MAX); |
452 | sector_offset = (cur_disk_bytenr - orig_disk_bytenr) >> | |
453 | fs_info->sectorsize_bits; | |
454 | csum_dst = csum + sector_offset * csum_size; | |
455 | ||
456 | count = search_csum_tree(fs_info, path, cur_disk_bytenr, | |
457 | search_len, csum_dst); | |
458 | if (count <= 0) { | |
459 | /* | |
460 | * Either we hit a critical error or we didn't find | |
461 | * the csum. | |
462 | * Either way, we put zero into the csums dst, and skip | |
463 | * to the next sector. | |
464 | */ | |
465 | memset(csum_dst, 0, csum_size); | |
466 | count = 1; | |
467 | ||
468 | /* | |
469 | * For data reloc inode, we need to mark the range | |
470 | * NODATASUM so that balance won't report false csum | |
471 | * error. | |
472 | */ | |
473 | if (BTRFS_I(inode)->root->root_key.objectid == | |
474 | BTRFS_DATA_RELOC_TREE_OBJECTID) { | |
475 | u64 file_offset; | |
476 | int ret; | |
477 | ||
478 | ret = search_file_offset_in_bio(bio, inode, | |
479 | cur_disk_bytenr, &file_offset); | |
480 | if (ret) | |
481 | set_extent_bits(io_tree, file_offset, | |
482 | file_offset + sectorsize - 1, | |
483 | EXTENT_NODATASUM); | |
484 | } else { | |
485 | btrfs_warn_rl(fs_info, | |
486 | "csum hole found for disk bytenr range [%llu, %llu)", | |
487 | cur_disk_bytenr, cur_disk_bytenr + sectorsize); | |
488 | } | |
e4100d98 | 489 | } |
61b49440 | 490 | } |
389f239c | 491 | |
61b49440 | 492 | btrfs_free_path(path); |
e62958fc | 493 | return BLK_STS_OK; |
4b46fce2 JB |
494 | } |
495 | ||
17d217fe | 496 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
a2de733c | 497 | struct list_head *list, int search_commit) |
17d217fe | 498 | { |
0b246afa | 499 | struct btrfs_fs_info *fs_info = root->fs_info; |
17d217fe YZ |
500 | struct btrfs_key key; |
501 | struct btrfs_path *path; | |
502 | struct extent_buffer *leaf; | |
503 | struct btrfs_ordered_sum *sums; | |
17d217fe | 504 | struct btrfs_csum_item *item; |
0678b618 | 505 | LIST_HEAD(tmplist); |
17d217fe YZ |
506 | unsigned long offset; |
507 | int ret; | |
508 | size_t size; | |
509 | u64 csum_end; | |
223486c2 | 510 | const u32 csum_size = fs_info->csum_size; |
17d217fe | 511 | |
0b246afa JM |
512 | ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && |
513 | IS_ALIGNED(end + 1, fs_info->sectorsize)); | |
4277a9c3 | 514 | |
17d217fe | 515 | path = btrfs_alloc_path(); |
d8926bb3 MF |
516 | if (!path) |
517 | return -ENOMEM; | |
17d217fe | 518 | |
a2de733c AJ |
519 | if (search_commit) { |
520 | path->skip_locking = 1; | |
e4058b54 | 521 | path->reada = READA_FORWARD; |
a2de733c AJ |
522 | path->search_commit_root = 1; |
523 | } | |
524 | ||
17d217fe YZ |
525 | key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
526 | key.offset = start; | |
527 | key.type = BTRFS_EXTENT_CSUM_KEY; | |
528 | ||
07d400a6 | 529 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
17d217fe YZ |
530 | if (ret < 0) |
531 | goto fail; | |
532 | if (ret > 0 && path->slots[0] > 0) { | |
533 | leaf = path->nodes[0]; | |
534 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); | |
535 | if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID && | |
536 | key.type == BTRFS_EXTENT_CSUM_KEY) { | |
265fdfa6 | 537 | offset = (start - key.offset) >> fs_info->sectorsize_bits; |
17d217fe YZ |
538 | if (offset * csum_size < |
539 | btrfs_item_size_nr(leaf, path->slots[0] - 1)) | |
540 | path->slots[0]--; | |
541 | } | |
542 | } | |
543 | ||
544 | while (start <= end) { | |
545 | leaf = path->nodes[0]; | |
546 | if (path->slots[0] >= btrfs_header_nritems(leaf)) { | |
07d400a6 | 547 | ret = btrfs_next_leaf(root, path); |
17d217fe YZ |
548 | if (ret < 0) |
549 | goto fail; | |
550 | if (ret > 0) | |
551 | break; | |
552 | leaf = path->nodes[0]; | |
553 | } | |
554 | ||
555 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | |
556 | if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || | |
628c8282 ZYW |
557 | key.type != BTRFS_EXTENT_CSUM_KEY || |
558 | key.offset > end) | |
17d217fe YZ |
559 | break; |
560 | ||
561 | if (key.offset > start) | |
562 | start = key.offset; | |
563 | ||
564 | size = btrfs_item_size_nr(leaf, path->slots[0]); | |
0b246afa | 565 | csum_end = key.offset + (size / csum_size) * fs_info->sectorsize; |
87b29b20 YZ |
566 | if (csum_end <= start) { |
567 | path->slots[0]++; | |
568 | continue; | |
569 | } | |
17d217fe | 570 | |
07d400a6 | 571 | csum_end = min(csum_end, end + 1); |
17d217fe YZ |
572 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
573 | struct btrfs_csum_item); | |
07d400a6 YZ |
574 | while (start < csum_end) { |
575 | size = min_t(size_t, csum_end - start, | |
1e25a2e3 | 576 | max_ordered_sum_bytes(fs_info, csum_size)); |
0b246afa | 577 | sums = kzalloc(btrfs_ordered_sum_size(fs_info, size), |
f51a4a18 | 578 | GFP_NOFS); |
0678b618 MF |
579 | if (!sums) { |
580 | ret = -ENOMEM; | |
581 | goto fail; | |
582 | } | |
17d217fe | 583 | |
07d400a6 | 584 | sums->bytenr = start; |
f51a4a18 | 585 | sums->len = (int)size; |
07d400a6 | 586 | |
265fdfa6 | 587 | offset = (start - key.offset) >> fs_info->sectorsize_bits; |
07d400a6 | 588 | offset *= csum_size; |
265fdfa6 | 589 | size >>= fs_info->sectorsize_bits; |
07d400a6 | 590 | |
f51a4a18 MX |
591 | read_extent_buffer(path->nodes[0], |
592 | sums->sums, | |
593 | ((unsigned long)item) + offset, | |
594 | csum_size * size); | |
595 | ||
0b246afa | 596 | start += fs_info->sectorsize * size; |
0678b618 | 597 | list_add_tail(&sums->list, &tmplist); |
07d400a6 | 598 | } |
17d217fe YZ |
599 | path->slots[0]++; |
600 | } | |
601 | ret = 0; | |
602 | fail: | |
0678b618 | 603 | while (ret < 0 && !list_empty(&tmplist)) { |
6e5aafb2 | 604 | sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list); |
0678b618 MF |
605 | list_del(&sums->list); |
606 | kfree(sums); | |
607 | } | |
608 | list_splice_tail(&tmplist, list); | |
609 | ||
17d217fe YZ |
610 | btrfs_free_path(path); |
611 | return ret; | |
612 | } | |
613 | ||
51d470ae NB |
614 | /* |
615 | * btrfs_csum_one_bio - Calculates checksums of the data contained inside a bio | |
616 | * @inode: Owner of the data inside the bio | |
617 | * @bio: Contains the data to be checksummed | |
618 | * @file_start: offset in file this bio begins to describe | |
619 | * @contig: Boolean. If true/1 means all bio vecs in this bio are | |
620 | * contiguous and they begin at @file_start in the file. False/0 | |
621 | * means this bio can contains potentially discontigous bio vecs | |
622 | * so the logical offset of each should be calculated separately. | |
623 | */ | |
bd242a08 | 624 | blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio, |
2ff7e61e | 625 | u64 file_start, int contig) |
e015640f | 626 | { |
c3504372 | 627 | struct btrfs_fs_info *fs_info = inode->root->fs_info; |
d5178578 | 628 | SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); |
e6dcd2dc | 629 | struct btrfs_ordered_sum *sums; |
6cd7ce49 | 630 | struct btrfs_ordered_extent *ordered = NULL; |
e015640f | 631 | char *data; |
17347cec LB |
632 | struct bvec_iter iter; |
633 | struct bio_vec bvec; | |
f51a4a18 | 634 | int index; |
c40a3d38 | 635 | int nr_sectors; |
3edf7d33 CM |
636 | unsigned long total_bytes = 0; |
637 | unsigned long this_sum_bytes = 0; | |
17347cec | 638 | int i; |
3edf7d33 | 639 | u64 offset; |
a3d46aea NB |
640 | unsigned nofs_flag; |
641 | ||
642 | nofs_flag = memalloc_nofs_save(); | |
643 | sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size), | |
644 | GFP_KERNEL); | |
645 | memalloc_nofs_restore(nofs_flag); | |
e015640f | 646 | |
e015640f | 647 | if (!sums) |
4e4cbee9 | 648 | return BLK_STS_RESOURCE; |
3edf7d33 | 649 | |
4f024f37 | 650 | sums->len = bio->bi_iter.bi_size; |
e6dcd2dc | 651 | INIT_LIST_HEAD(&sums->list); |
d20f7043 CM |
652 | |
653 | if (contig) | |
654 | offset = file_start; | |
655 | else | |
6cd7ce49 | 656 | offset = 0; /* shut up gcc */ |
d20f7043 | 657 | |
1201b58b | 658 | sums->bytenr = bio->bi_iter.bi_sector << 9; |
f51a4a18 | 659 | index = 0; |
e015640f | 660 | |
d5178578 JT |
661 | shash->tfm = fs_info->csum_shash; |
662 | ||
17347cec | 663 | bio_for_each_segment(bvec, bio, iter) { |
d20f7043 | 664 | if (!contig) |
17347cec | 665 | offset = page_offset(bvec.bv_page) + bvec.bv_offset; |
d20f7043 | 666 | |
6cd7ce49 CH |
667 | if (!ordered) { |
668 | ordered = btrfs_lookup_ordered_extent(inode, offset); | |
669 | BUG_ON(!ordered); /* Logic error */ | |
670 | } | |
671 | ||
0b246afa | 672 | nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, |
17347cec | 673 | bvec.bv_len + fs_info->sectorsize |
0b246afa | 674 | - 1); |
c40a3d38 CR |
675 | |
676 | for (i = 0; i < nr_sectors; i++) { | |
bffe633e OS |
677 | if (offset >= ordered->file_offset + ordered->num_bytes || |
678 | offset < ordered->file_offset) { | |
c40a3d38 CR |
679 | unsigned long bytes_left; |
680 | ||
c40a3d38 CR |
681 | sums->len = this_sum_bytes; |
682 | this_sum_bytes = 0; | |
f9756261 | 683 | btrfs_add_ordered_sum(ordered, sums); |
c40a3d38 CR |
684 | btrfs_put_ordered_extent(ordered); |
685 | ||
686 | bytes_left = bio->bi_iter.bi_size - total_bytes; | |
687 | ||
a3d46aea NB |
688 | nofs_flag = memalloc_nofs_save(); |
689 | sums = kvzalloc(btrfs_ordered_sum_size(fs_info, | |
690 | bytes_left), GFP_KERNEL); | |
691 | memalloc_nofs_restore(nofs_flag); | |
c40a3d38 CR |
692 | BUG_ON(!sums); /* -ENOMEM */ |
693 | sums->len = bytes_left; | |
694 | ordered = btrfs_lookup_ordered_extent(inode, | |
695 | offset); | |
696 | ASSERT(ordered); /* Logic error */ | |
1201b58b | 697 | sums->bytenr = (bio->bi_iter.bi_sector << 9) |
c40a3d38 CR |
698 | + total_bytes; |
699 | index = 0; | |
c40a3d38 | 700 | } |
3edf7d33 | 701 | |
443c8e2a | 702 | data = kmap_atomic(bvec.bv_page); |
fd08001f | 703 | crypto_shash_digest(shash, data + bvec.bv_offset |
d5178578 | 704 | + (i * fs_info->sectorsize), |
fd08001f EB |
705 | fs_info->sectorsize, |
706 | sums->sums + index); | |
443c8e2a | 707 | kunmap_atomic(data); |
713cebfb | 708 | index += fs_info->csum_size; |
0b246afa JM |
709 | offset += fs_info->sectorsize; |
710 | this_sum_bytes += fs_info->sectorsize; | |
711 | total_bytes += fs_info->sectorsize; | |
3edf7d33 CM |
712 | } |
713 | ||
e015640f | 714 | } |
ed98b56a | 715 | this_sum_bytes = 0; |
f9756261 | 716 | btrfs_add_ordered_sum(ordered, sums); |
3edf7d33 | 717 | btrfs_put_ordered_extent(ordered); |
e015640f CM |
718 | return 0; |
719 | } | |
720 | ||
459931ec CM |
721 | /* |
722 | * helper function for csum removal, this expects the | |
723 | * key to describe the csum pointed to by the path, and it expects | |
724 | * the csum to overlap the range [bytenr, len] | |
725 | * | |
726 | * The csum should not be entirely contained in the range and the | |
727 | * range should not be entirely contained in the csum. | |
728 | * | |
729 | * This calls btrfs_truncate_item with the correct args based on the | |
730 | * overlap, and fixes up the key as required. | |
731 | */ | |
2ff7e61e | 732 | static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info, |
143bede5 JM |
733 | struct btrfs_path *path, |
734 | struct btrfs_key *key, | |
735 | u64 bytenr, u64 len) | |
459931ec CM |
736 | { |
737 | struct extent_buffer *leaf; | |
223486c2 | 738 | const u32 csum_size = fs_info->csum_size; |
459931ec CM |
739 | u64 csum_end; |
740 | u64 end_byte = bytenr + len; | |
265fdfa6 | 741 | u32 blocksize_bits = fs_info->sectorsize_bits; |
459931ec CM |
742 | |
743 | leaf = path->nodes[0]; | |
744 | csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size; | |
265fdfa6 | 745 | csum_end <<= blocksize_bits; |
459931ec CM |
746 | csum_end += key->offset; |
747 | ||
748 | if (key->offset < bytenr && csum_end <= end_byte) { | |
749 | /* | |
750 | * [ bytenr - len ] | |
751 | * [ ] | |
752 | * [csum ] | |
753 | * A simple truncate off the end of the item | |
754 | */ | |
755 | u32 new_size = (bytenr - key->offset) >> blocksize_bits; | |
756 | new_size *= csum_size; | |
78ac4f9e | 757 | btrfs_truncate_item(path, new_size, 1); |
459931ec CM |
758 | } else if (key->offset >= bytenr && csum_end > end_byte && |
759 | end_byte > key->offset) { | |
760 | /* | |
761 | * [ bytenr - len ] | |
762 | * [ ] | |
763 | * [csum ] | |
764 | * we need to truncate from the beginning of the csum | |
765 | */ | |
766 | u32 new_size = (csum_end - end_byte) >> blocksize_bits; | |
767 | new_size *= csum_size; | |
768 | ||
78ac4f9e | 769 | btrfs_truncate_item(path, new_size, 0); |
459931ec CM |
770 | |
771 | key->offset = end_byte; | |
0b246afa | 772 | btrfs_set_item_key_safe(fs_info, path, key); |
459931ec CM |
773 | } else { |
774 | BUG(); | |
775 | } | |
459931ec CM |
776 | } |
777 | ||
778 | /* | |
779 | * deletes the csum items from the csum tree for a given | |
780 | * range of bytes. | |
781 | */ | |
782 | int btrfs_del_csums(struct btrfs_trans_handle *trans, | |
40e046ac | 783 | struct btrfs_root *root, u64 bytenr, u64 len) |
459931ec | 784 | { |
40e046ac | 785 | struct btrfs_fs_info *fs_info = trans->fs_info; |
459931ec CM |
786 | struct btrfs_path *path; |
787 | struct btrfs_key key; | |
788 | u64 end_byte = bytenr + len; | |
789 | u64 csum_end; | |
790 | struct extent_buffer *leaf; | |
b86652be | 791 | int ret = 0; |
223486c2 | 792 | const u32 csum_size = fs_info->csum_size; |
265fdfa6 | 793 | u32 blocksize_bits = fs_info->sectorsize_bits; |
459931ec | 794 | |
40e046ac FM |
795 | ASSERT(root == fs_info->csum_root || |
796 | root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); | |
797 | ||
459931ec | 798 | path = btrfs_alloc_path(); |
2a29edc6 | 799 | if (!path) |
800 | return -ENOMEM; | |
459931ec | 801 | |
d397712b | 802 | while (1) { |
459931ec CM |
803 | key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
804 | key.offset = end_byte - 1; | |
805 | key.type = BTRFS_EXTENT_CSUM_KEY; | |
806 | ||
807 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | |
808 | if (ret > 0) { | |
b86652be | 809 | ret = 0; |
459931ec | 810 | if (path->slots[0] == 0) |
65a246c5 | 811 | break; |
459931ec | 812 | path->slots[0]--; |
ad0397a7 | 813 | } else if (ret < 0) { |
65a246c5 | 814 | break; |
459931ec | 815 | } |
ad0397a7 | 816 | |
459931ec CM |
817 | leaf = path->nodes[0]; |
818 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | |
819 | ||
820 | if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || | |
821 | key.type != BTRFS_EXTENT_CSUM_KEY) { | |
822 | break; | |
823 | } | |
824 | ||
825 | if (key.offset >= end_byte) | |
826 | break; | |
827 | ||
828 | csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size; | |
829 | csum_end <<= blocksize_bits; | |
830 | csum_end += key.offset; | |
831 | ||
832 | /* this csum ends before we start, we're done */ | |
833 | if (csum_end <= bytenr) | |
834 | break; | |
835 | ||
836 | /* delete the entire item, it is inside our range */ | |
837 | if (key.offset >= bytenr && csum_end <= end_byte) { | |
6f546216 FM |
838 | int del_nr = 1; |
839 | ||
840 | /* | |
841 | * Check how many csum items preceding this one in this | |
842 | * leaf correspond to our range and then delete them all | |
843 | * at once. | |
844 | */ | |
845 | if (key.offset > bytenr && path->slots[0] > 0) { | |
846 | int slot = path->slots[0] - 1; | |
847 | ||
848 | while (slot >= 0) { | |
849 | struct btrfs_key pk; | |
850 | ||
851 | btrfs_item_key_to_cpu(leaf, &pk, slot); | |
852 | if (pk.offset < bytenr || | |
853 | pk.type != BTRFS_EXTENT_CSUM_KEY || | |
854 | pk.objectid != | |
855 | BTRFS_EXTENT_CSUM_OBJECTID) | |
856 | break; | |
857 | path->slots[0] = slot; | |
858 | del_nr++; | |
859 | key.offset = pk.offset; | |
860 | slot--; | |
861 | } | |
862 | } | |
863 | ret = btrfs_del_items(trans, root, path, | |
864 | path->slots[0], del_nr); | |
65a246c5 | 865 | if (ret) |
b86652be | 866 | break; |
dcbdd4dc CM |
867 | if (key.offset == bytenr) |
868 | break; | |
459931ec CM |
869 | } else if (key.offset < bytenr && csum_end > end_byte) { |
870 | unsigned long offset; | |
871 | unsigned long shift_len; | |
872 | unsigned long item_offset; | |
873 | /* | |
874 | * [ bytenr - len ] | |
875 | * [csum ] | |
876 | * | |
877 | * Our bytes are in the middle of the csum, | |
878 | * we need to split this item and insert a new one. | |
879 | * | |
880 | * But we can't drop the path because the | |
881 | * csum could change, get removed, extended etc. | |
882 | * | |
883 | * The trick here is the max size of a csum item leaves | |
884 | * enough room in the tree block for a single | |
885 | * item header. So, we split the item in place, | |
886 | * adding a new header pointing to the existing | |
887 | * bytes. Then we loop around again and we have | |
888 | * a nicely formed csum item that we can neatly | |
889 | * truncate. | |
890 | */ | |
891 | offset = (bytenr - key.offset) >> blocksize_bits; | |
892 | offset *= csum_size; | |
893 | ||
894 | shift_len = (len >> blocksize_bits) * csum_size; | |
895 | ||
896 | item_offset = btrfs_item_ptr_offset(leaf, | |
897 | path->slots[0]); | |
898 | ||
b159fa28 | 899 | memzero_extent_buffer(leaf, item_offset + offset, |
459931ec CM |
900 | shift_len); |
901 | key.offset = bytenr; | |
902 | ||
903 | /* | |
904 | * btrfs_split_item returns -EAGAIN when the | |
905 | * item changed size or key | |
906 | */ | |
907 | ret = btrfs_split_item(trans, root, path, &key, offset); | |
79787eaa | 908 | if (ret && ret != -EAGAIN) { |
66642832 | 909 | btrfs_abort_transaction(trans, ret); |
b86652be | 910 | break; |
79787eaa | 911 | } |
b86652be | 912 | ret = 0; |
459931ec CM |
913 | |
914 | key.offset = end_byte - 1; | |
915 | } else { | |
2ff7e61e | 916 | truncate_one_csum(fs_info, path, &key, bytenr, len); |
dcbdd4dc CM |
917 | if (key.offset < bytenr) |
918 | break; | |
459931ec | 919 | } |
b3b4aa74 | 920 | btrfs_release_path(path); |
459931ec | 921 | } |
459931ec | 922 | btrfs_free_path(path); |
65a246c5 | 923 | return ret; |
459931ec CM |
924 | } |
925 | ||
ea7036de FM |
926 | static int find_next_csum_offset(struct btrfs_root *root, |
927 | struct btrfs_path *path, | |
928 | u64 *next_offset) | |
929 | { | |
930 | const u32 nritems = btrfs_header_nritems(path->nodes[0]); | |
931 | struct btrfs_key found_key; | |
932 | int slot = path->slots[0] + 1; | |
933 | int ret; | |
934 | ||
935 | if (nritems == 0 || slot >= nritems) { | |
936 | ret = btrfs_next_leaf(root, path); | |
937 | if (ret < 0) { | |
938 | return ret; | |
939 | } else if (ret > 0) { | |
940 | *next_offset = (u64)-1; | |
941 | return 0; | |
942 | } | |
943 | slot = path->slots[0]; | |
944 | } | |
945 | ||
946 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); | |
947 | ||
948 | if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || | |
949 | found_key.type != BTRFS_EXTENT_CSUM_KEY) | |
950 | *next_offset = (u64)-1; | |
951 | else | |
952 | *next_offset = found_key.offset; | |
953 | ||
954 | return 0; | |
955 | } | |
956 | ||
065631f6 | 957 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, |
d20f7043 | 958 | struct btrfs_root *root, |
e6dcd2dc | 959 | struct btrfs_ordered_sum *sums) |
f254e52c | 960 | { |
0b246afa | 961 | struct btrfs_fs_info *fs_info = root->fs_info; |
f254e52c | 962 | struct btrfs_key file_key; |
6567e837 | 963 | struct btrfs_key found_key; |
5caf2a00 | 964 | struct btrfs_path *path; |
f254e52c | 965 | struct btrfs_csum_item *item; |
065631f6 | 966 | struct btrfs_csum_item *item_end; |
ff79f819 | 967 | struct extent_buffer *leaf = NULL; |
f51a4a18 MX |
968 | u64 next_offset; |
969 | u64 total_bytes = 0; | |
6567e837 | 970 | u64 csum_offset; |
f51a4a18 | 971 | u64 bytenr; |
f578d4bd | 972 | u32 ins_size; |
f51a4a18 MX |
973 | int index = 0; |
974 | int found_next; | |
975 | int ret; | |
223486c2 | 976 | const u32 csum_size = fs_info->csum_size; |
6e92f5e6 | 977 | |
5caf2a00 | 978 | path = btrfs_alloc_path(); |
d8926bb3 MF |
979 | if (!path) |
980 | return -ENOMEM; | |
065631f6 CM |
981 | again: |
982 | next_offset = (u64)-1; | |
983 | found_next = 0; | |
f51a4a18 | 984 | bytenr = sums->bytenr + total_bytes; |
d20f7043 | 985 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
f51a4a18 | 986 | file_key.offset = bytenr; |
962a298f | 987 | file_key.type = BTRFS_EXTENT_CSUM_KEY; |
a429e513 | 988 | |
f51a4a18 | 989 | item = btrfs_lookup_csum(trans, root, path, bytenr, 1); |
ff79f819 | 990 | if (!IS_ERR(item)) { |
639cb586 | 991 | ret = 0; |
f51a4a18 MX |
992 | leaf = path->nodes[0]; |
993 | item_end = btrfs_item_ptr(leaf, path->slots[0], | |
994 | struct btrfs_csum_item); | |
995 | item_end = (struct btrfs_csum_item *)((char *)item_end + | |
996 | btrfs_item_size_nr(leaf, path->slots[0])); | |
a429e513 | 997 | goto found; |
ff79f819 | 998 | } |
a429e513 | 999 | ret = PTR_ERR(item); |
4a500fd1 | 1000 | if (ret != -EFBIG && ret != -ENOENT) |
918cdf44 | 1001 | goto out; |
4a500fd1 | 1002 | |
a429e513 CM |
1003 | if (ret == -EFBIG) { |
1004 | u32 item_size; | |
1005 | /* we found one, but it isn't big enough yet */ | |
5f39d397 CM |
1006 | leaf = path->nodes[0]; |
1007 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | |
607d432d | 1008 | if ((item_size / csum_size) >= |
0b246afa | 1009 | MAX_CSUM_ITEMS(fs_info, csum_size)) { |
a429e513 CM |
1010 | /* already at max size, make a new one */ |
1011 | goto insert; | |
1012 | } | |
1013 | } else { | |
ea7036de FM |
1014 | /* We didn't find a csum item, insert one. */ |
1015 | ret = find_next_csum_offset(root, path, &next_offset); | |
1016 | if (ret < 0) | |
1017 | goto out; | |
f578d4bd | 1018 | found_next = 1; |
a429e513 CM |
1019 | goto insert; |
1020 | } | |
1021 | ||
1022 | /* | |
cc14600c FM |
1023 | * At this point, we know the tree has a checksum item that ends at an |
1024 | * offset matching the start of the checksum range we want to insert. | |
1025 | * We try to extend that item as much as possible and then add as many | |
1026 | * checksums to it as they fit. | |
1027 | * | |
1028 | * First check if the leaf has enough free space for at least one | |
1029 | * checksum. If it has go directly to the item extension code, otherwise | |
1030 | * release the path and do a search for insertion before the extension. | |
a429e513 | 1031 | */ |
cc14600c FM |
1032 | if (btrfs_leaf_free_space(leaf) >= csum_size) { |
1033 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | |
1034 | csum_offset = (bytenr - found_key.offset) >> | |
265fdfa6 | 1035 | fs_info->sectorsize_bits; |
cc14600c FM |
1036 | goto extend_csum; |
1037 | } | |
1038 | ||
b3b4aa74 | 1039 | btrfs_release_path(path); |
9a664971 | 1040 | path->search_for_extension = 1; |
6567e837 | 1041 | ret = btrfs_search_slot(trans, root, &file_key, path, |
607d432d | 1042 | csum_size, 1); |
9a664971 | 1043 | path->search_for_extension = 0; |
6567e837 | 1044 | if (ret < 0) |
918cdf44 | 1045 | goto out; |
459931ec CM |
1046 | |
1047 | if (ret > 0) { | |
1048 | if (path->slots[0] == 0) | |
1049 | goto insert; | |
1050 | path->slots[0]--; | |
6567e837 | 1051 | } |
459931ec | 1052 | |
5f39d397 CM |
1053 | leaf = path->nodes[0]; |
1054 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | |
265fdfa6 | 1055 | csum_offset = (bytenr - found_key.offset) >> fs_info->sectorsize_bits; |
459931ec | 1056 | |
962a298f | 1057 | if (found_key.type != BTRFS_EXTENT_CSUM_KEY || |
d20f7043 | 1058 | found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || |
0b246afa | 1059 | csum_offset >= MAX_CSUM_ITEMS(fs_info, csum_size)) { |
6567e837 CM |
1060 | goto insert; |
1061 | } | |
459931ec | 1062 | |
cc14600c | 1063 | extend_csum: |
2f697dc6 | 1064 | if (csum_offset == btrfs_item_size_nr(leaf, path->slots[0]) / |
607d432d | 1065 | csum_size) { |
2f697dc6 LB |
1066 | int extend_nr; |
1067 | u64 tmp; | |
1068 | u32 diff; | |
2f697dc6 | 1069 | |
f51a4a18 | 1070 | tmp = sums->len - total_bytes; |
265fdfa6 | 1071 | tmp >>= fs_info->sectorsize_bits; |
2f697dc6 | 1072 | WARN_ON(tmp < 1); |
ea7036de FM |
1073 | extend_nr = max_t(int, 1, tmp); |
1074 | ||
1075 | /* | |
1076 | * A log tree can already have checksum items with a subset of | |
1077 | * the checksums we are trying to log. This can happen after | |
1078 | * doing a sequence of partial writes into prealloc extents and | |
1079 | * fsyncs in between, with a full fsync logging a larger subrange | |
1080 | * of an extent for which a previous fast fsync logged a smaller | |
1081 | * subrange. And this happens in particular due to merging file | |
1082 | * extent items when we complete an ordered extent for a range | |
1083 | * covered by a prealloc extent - this is done at | |
1084 | * btrfs_mark_extent_written(). | |
1085 | * | |
1086 | * So if we try to extend the previous checksum item, which has | |
1087 | * a range that ends at the start of the range we want to insert, | |
1088 | * make sure we don't extend beyond the start offset of the next | |
1089 | * checksum item. If we are at the last item in the leaf, then | |
1090 | * forget the optimization of extending and add a new checksum | |
1091 | * item - it is not worth the complexity of releasing the path, | |
1092 | * getting the first key for the next leaf, repeat the btree | |
1093 | * search, etc, because log trees are temporary anyway and it | |
1094 | * would only save a few bytes of leaf space. | |
1095 | */ | |
1096 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { | |
1097 | if (path->slots[0] + 1 >= | |
1098 | btrfs_header_nritems(path->nodes[0])) { | |
1099 | ret = find_next_csum_offset(root, path, &next_offset); | |
1100 | if (ret < 0) | |
1101 | goto out; | |
1102 | found_next = 1; | |
1103 | goto insert; | |
1104 | } | |
1105 | ||
1106 | ret = find_next_csum_offset(root, path, &next_offset); | |
1107 | if (ret < 0) | |
1108 | goto out; | |
1109 | ||
1110 | tmp = (next_offset - bytenr) >> fs_info->sectorsize_bits; | |
1111 | if (tmp <= INT_MAX) | |
1112 | extend_nr = min_t(int, extend_nr, tmp); | |
1113 | } | |
2f697dc6 | 1114 | |
2f697dc6 | 1115 | diff = (csum_offset + extend_nr) * csum_size; |
0b246afa JM |
1116 | diff = min(diff, |
1117 | MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size); | |
459931ec | 1118 | |
5f39d397 | 1119 | diff = diff - btrfs_item_size_nr(leaf, path->slots[0]); |
cc14600c | 1120 | diff = min_t(u32, btrfs_leaf_free_space(leaf), diff); |
2f697dc6 LB |
1121 | diff /= csum_size; |
1122 | diff *= csum_size; | |
459931ec | 1123 | |
c71dd880 | 1124 | btrfs_extend_item(path, diff); |
f51a4a18 | 1125 | ret = 0; |
6567e837 CM |
1126 | goto csum; |
1127 | } | |
1128 | ||
1129 | insert: | |
b3b4aa74 | 1130 | btrfs_release_path(path); |
6567e837 | 1131 | csum_offset = 0; |
f578d4bd | 1132 | if (found_next) { |
2f697dc6 | 1133 | u64 tmp; |
d20f7043 | 1134 | |
f51a4a18 | 1135 | tmp = sums->len - total_bytes; |
265fdfa6 | 1136 | tmp >>= fs_info->sectorsize_bits; |
2f697dc6 | 1137 | tmp = min(tmp, (next_offset - file_key.offset) >> |
265fdfa6 | 1138 | fs_info->sectorsize_bits); |
2f697dc6 | 1139 | |
50d0446e SK |
1140 | tmp = max_t(u64, 1, tmp); |
1141 | tmp = min_t(u64, tmp, MAX_CSUM_ITEMS(fs_info, csum_size)); | |
607d432d | 1142 | ins_size = csum_size * tmp; |
f578d4bd | 1143 | } else { |
607d432d | 1144 | ins_size = csum_size; |
f578d4bd | 1145 | } |
5caf2a00 | 1146 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, |
f578d4bd | 1147 | ins_size); |
54aa1f4d | 1148 | if (ret < 0) |
918cdf44 | 1149 | goto out; |
fae7f21c | 1150 | if (WARN_ON(ret != 0)) |
918cdf44 | 1151 | goto out; |
5f39d397 | 1152 | leaf = path->nodes[0]; |
f51a4a18 | 1153 | csum: |
5f39d397 | 1154 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); |
f51a4a18 MX |
1155 | item_end = (struct btrfs_csum_item *)((unsigned char *)item + |
1156 | btrfs_item_size_nr(leaf, path->slots[0])); | |
509659cd | 1157 | item = (struct btrfs_csum_item *)((unsigned char *)item + |
607d432d | 1158 | csum_offset * csum_size); |
b18c6685 | 1159 | found: |
265fdfa6 | 1160 | ins_size = (u32)(sums->len - total_bytes) >> fs_info->sectorsize_bits; |
f51a4a18 MX |
1161 | ins_size *= csum_size; |
1162 | ins_size = min_t(u32, (unsigned long)item_end - (unsigned long)item, | |
1163 | ins_size); | |
1164 | write_extent_buffer(leaf, sums->sums + index, (unsigned long)item, | |
1165 | ins_size); | |
1166 | ||
1e25a2e3 | 1167 | index += ins_size; |
f51a4a18 | 1168 | ins_size /= csum_size; |
0b246afa | 1169 | total_bytes += ins_size * fs_info->sectorsize; |
a6591715 | 1170 | |
5caf2a00 | 1171 | btrfs_mark_buffer_dirty(path->nodes[0]); |
e6dcd2dc | 1172 | if (total_bytes < sums->len) { |
b3b4aa74 | 1173 | btrfs_release_path(path); |
b9473439 | 1174 | cond_resched(); |
065631f6 CM |
1175 | goto again; |
1176 | } | |
53863232 | 1177 | out: |
5caf2a00 | 1178 | btrfs_free_path(path); |
f254e52c CM |
1179 | return ret; |
1180 | } | |
7ffbb598 | 1181 | |
9cdc5124 | 1182 | void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, |
7ffbb598 FM |
1183 | const struct btrfs_path *path, |
1184 | struct btrfs_file_extent_item *fi, | |
1185 | const bool new_inline, | |
1186 | struct extent_map *em) | |
1187 | { | |
3ffbd68c | 1188 | struct btrfs_fs_info *fs_info = inode->root->fs_info; |
9cdc5124 | 1189 | struct btrfs_root *root = inode->root; |
7ffbb598 FM |
1190 | struct extent_buffer *leaf = path->nodes[0]; |
1191 | const int slot = path->slots[0]; | |
1192 | struct btrfs_key key; | |
1193 | u64 extent_start, extent_end; | |
1194 | u64 bytenr; | |
1195 | u8 type = btrfs_file_extent_type(leaf, fi); | |
1196 | int compress_type = btrfs_file_extent_compression(leaf, fi); | |
1197 | ||
7ffbb598 FM |
1198 | btrfs_item_key_to_cpu(leaf, &key, slot); |
1199 | extent_start = key.offset; | |
a5eeb3d1 | 1200 | extent_end = btrfs_file_extent_end(path); |
7ffbb598 FM |
1201 | em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); |
1202 | if (type == BTRFS_FILE_EXTENT_REG || | |
1203 | type == BTRFS_FILE_EXTENT_PREALLOC) { | |
1204 | em->start = extent_start; | |
1205 | em->len = extent_end - extent_start; | |
1206 | em->orig_start = extent_start - | |
1207 | btrfs_file_extent_offset(leaf, fi); | |
1208 | em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi); | |
1209 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | |
1210 | if (bytenr == 0) { | |
1211 | em->block_start = EXTENT_MAP_HOLE; | |
1212 | return; | |
1213 | } | |
1214 | if (compress_type != BTRFS_COMPRESS_NONE) { | |
1215 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | |
1216 | em->compress_type = compress_type; | |
1217 | em->block_start = bytenr; | |
1218 | em->block_len = em->orig_block_len; | |
1219 | } else { | |
1220 | bytenr += btrfs_file_extent_offset(leaf, fi); | |
1221 | em->block_start = bytenr; | |
1222 | em->block_len = em->len; | |
1223 | if (type == BTRFS_FILE_EXTENT_PREALLOC) | |
1224 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); | |
1225 | } | |
1226 | } else if (type == BTRFS_FILE_EXTENT_INLINE) { | |
1227 | em->block_start = EXTENT_MAP_INLINE; | |
1228 | em->start = extent_start; | |
1229 | em->len = extent_end - extent_start; | |
1230 | /* | |
1231 | * Initialize orig_start and block_len with the same values | |
1232 | * as in inode.c:btrfs_get_extent(). | |
1233 | */ | |
1234 | em->orig_start = EXTENT_MAP_HOLE; | |
1235 | em->block_len = (u64)-1; | |
1236 | if (!new_inline && compress_type != BTRFS_COMPRESS_NONE) { | |
1237 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | |
1238 | em->compress_type = compress_type; | |
1239 | } | |
1240 | } else { | |
0b246afa | 1241 | btrfs_err(fs_info, |
9cdc5124 NB |
1242 | "unknown file extent item type %d, inode %llu, offset %llu, " |
1243 | "root %llu", type, btrfs_ino(inode), extent_start, | |
7ffbb598 FM |
1244 | root->root_key.objectid); |
1245 | } | |
1246 | } | |
a5eeb3d1 FM |
1247 | |
1248 | /* | |
1249 | * Returns the end offset (non inclusive) of the file extent item the given path | |
1250 | * points to. If it points to an inline extent, the returned offset is rounded | |
1251 | * up to the sector size. | |
1252 | */ | |
1253 | u64 btrfs_file_extent_end(const struct btrfs_path *path) | |
1254 | { | |
1255 | const struct extent_buffer *leaf = path->nodes[0]; | |
1256 | const int slot = path->slots[0]; | |
1257 | struct btrfs_file_extent_item *fi; | |
1258 | struct btrfs_key key; | |
1259 | u64 end; | |
1260 | ||
1261 | btrfs_item_key_to_cpu(leaf, &key, slot); | |
1262 | ASSERT(key.type == BTRFS_EXTENT_DATA_KEY); | |
1263 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | |
1264 | ||
1265 | if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { | |
1266 | end = btrfs_file_extent_ram_bytes(leaf, fi); | |
1267 | end = ALIGN(key.offset + end, leaf->fs_info->sectorsize); | |
1268 | } else { | |
1269 | end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); | |
1270 | } | |
1271 | ||
1272 | return end; | |
1273 | } |