Commit | Line | Data |
---|---|---|
cac06d84 QW |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include <linux/slab.h> | |
9b569ea0 | 4 | #include "messages.h" |
cac06d84 QW |
5 | #include "ctree.h" |
6 | #include "subpage.h" | |
3d078efa | 7 | #include "btrfs_inode.h" |
cac06d84 | 8 | |
894d1378 QW |
9 | /* |
10 | * Subpage (sectorsize < PAGE_SIZE) support overview: | |
11 | * | |
12 | * Limitations: | |
13 | * | |
14 | * - Only support 64K page size for now | |
15 | * This is to make metadata handling easier, as 64K page would ensure | |
16 | * all nodesize would fit inside one page, thus we don't need to handle | |
17 | * cases where a tree block crosses several pages. | |
18 | * | |
19 | * - Only metadata read-write for now | |
20 | * The data read-write part is in development. | |
21 | * | |
22 | * - Metadata can't cross 64K page boundary | |
23 | * btrfs-progs and kernel have done that for a while, thus only ancient | |
24 | * filesystems could have such problem. For such case, do a graceful | |
25 | * rejection. | |
26 | * | |
27 | * Special behavior: | |
28 | * | |
29 | * - Metadata | |
30 | * Metadata read is fully supported. | |
31 | * Meaning when reading one tree block will only trigger the read for the | |
32 | * needed range, other unrelated range in the same page will not be touched. | |
33 | * | |
34 | * Metadata write support is partial. | |
35 | * The writeback is still for the full page, but we will only submit | |
36 | * the dirty extent buffers in the page. | |
37 | * | |
38 | * This means, if we have a metadata page like this: | |
39 | * | |
40 | * Page offset | |
41 | * 0 16K 32K 48K 64K | |
42 | * |/////////| |///////////| | |
43 | * \- Tree block A \- Tree block B | |
44 | * | |
45 | * Even if we just want to writeback tree block A, we will also writeback | |
46 | * tree block B if it's also dirty. | |
47 | * | |
48 | * This may cause extra metadata writeback which results more COW. | |
49 | * | |
50 | * Implementation: | |
51 | * | |
52 | * - Common | |
53 | * Both metadata and data will use a new structure, btrfs_subpage, to | |
54 | * record the status of each sector inside a page. This provides the extra | |
55 | * granularity needed. | |
56 | * | |
57 | * - Metadata | |
58 | * Since we have multiple tree blocks inside one page, we can't rely on page | |
59 | * locking anymore, or we will have greatly reduced concurrency or even | |
60 | * deadlocks (hold one tree lock while trying to lock another tree lock in | |
61 | * the same page). | |
62 | * | |
63 | * Thus for metadata locking, subpage support relies on io_tree locking only. | |
64 | * This means a slightly higher tree locking latency. | |
65 | */ | |
66 | ||
fbca46eb QW |
67 | bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct page *page) |
68 | { | |
69 | if (fs_info->sectorsize >= PAGE_SIZE) | |
70 | return false; | |
71 | ||
72 | /* | |
73 | * Only data pages (either through DIO or compression) can have no | |
74 | * mapping. And if page->mapping->host is data inode, it's subpage. | |
75 | * As we have ruled our sectorsize >= PAGE_SIZE case already. | |
76 | */ | |
77 | if (!page->mapping || !page->mapping->host || | |
78 | is_data_inode(page->mapping->host)) | |
79 | return true; | |
80 | ||
81 | /* | |
82 | * Now the only remaining case is metadata, which we only go subpage | |
83 | * routine if nodesize < PAGE_SIZE. | |
84 | */ | |
85 | if (fs_info->nodesize < PAGE_SIZE) | |
86 | return true; | |
87 | return false; | |
88 | } | |
89 | ||
8481dd80 QW |
90 | void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize) |
91 | { | |
92 | unsigned int cur = 0; | |
93 | unsigned int nr_bits; | |
94 | ||
95 | ASSERT(IS_ALIGNED(PAGE_SIZE, sectorsize)); | |
96 | ||
97 | nr_bits = PAGE_SIZE / sectorsize; | |
98 | subpage_info->bitmap_nr_bits = nr_bits; | |
99 | ||
100 | subpage_info->uptodate_offset = cur; | |
101 | cur += nr_bits; | |
102 | ||
8481dd80 QW |
103 | subpage_info->dirty_offset = cur; |
104 | cur += nr_bits; | |
105 | ||
106 | subpage_info->writeback_offset = cur; | |
107 | cur += nr_bits; | |
108 | ||
109 | subpage_info->ordered_offset = cur; | |
110 | cur += nr_bits; | |
111 | ||
e4f94347 QW |
112 | subpage_info->checked_offset = cur; |
113 | cur += nr_bits; | |
114 | ||
8481dd80 QW |
115 | subpage_info->total_nr_bits = cur; |
116 | } | |
117 | ||
cac06d84 QW |
118 | int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, |
119 | struct page *page, enum btrfs_subpage_type type) | |
120 | { | |
651fb419 | 121 | struct btrfs_subpage *subpage; |
cac06d84 QW |
122 | |
123 | /* | |
143823cf | 124 | * We have cases like a dummy extent buffer page, which is not mapped |
cac06d84 QW |
125 | * and doesn't need to be locked. |
126 | */ | |
127 | if (page->mapping) | |
128 | ASSERT(PageLocked(page)); | |
651fb419 | 129 | |
cac06d84 | 130 | /* Either not subpage, or the page already has private attached */ |
fbca46eb | 131 | if (!btrfs_is_subpage(fs_info, page) || PagePrivate(page)) |
cac06d84 QW |
132 | return 0; |
133 | ||
651fb419 QW |
134 | subpage = btrfs_alloc_subpage(fs_info, type); |
135 | if (IS_ERR(subpage)) | |
136 | return PTR_ERR(subpage); | |
137 | ||
cac06d84 QW |
138 | attach_page_private(page, subpage); |
139 | return 0; | |
140 | } | |
141 | ||
142 | void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, | |
143 | struct page *page) | |
144 | { | |
145 | struct btrfs_subpage *subpage; | |
146 | ||
147 | /* Either not subpage, or already detached */ | |
fbca46eb | 148 | if (!btrfs_is_subpage(fs_info, page) || !PagePrivate(page)) |
cac06d84 QW |
149 | return; |
150 | ||
0d031dc4 | 151 | subpage = detach_page_private(page); |
cac06d84 | 152 | ASSERT(subpage); |
760f991f QW |
153 | btrfs_free_subpage(subpage); |
154 | } | |
155 | ||
651fb419 QW |
156 | struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, |
157 | enum btrfs_subpage_type type) | |
760f991f | 158 | { |
651fb419 | 159 | struct btrfs_subpage *ret; |
72a69cd0 | 160 | unsigned int real_size; |
651fb419 | 161 | |
fdf250db | 162 | ASSERT(fs_info->sectorsize < PAGE_SIZE); |
760f991f | 163 | |
72a69cd0 QW |
164 | real_size = struct_size(ret, bitmaps, |
165 | BITS_TO_LONGS(fs_info->subpage_info->total_nr_bits)); | |
166 | ret = kzalloc(real_size, GFP_NOFS); | |
651fb419 QW |
167 | if (!ret) |
168 | return ERR_PTR(-ENOMEM); | |
169 | ||
170 | spin_lock_init(&ret->lock); | |
1e1de387 | 171 | if (type == BTRFS_SUBPAGE_METADATA) { |
651fb419 | 172 | atomic_set(&ret->eb_refs, 0); |
1e1de387 | 173 | } else { |
651fb419 QW |
174 | atomic_set(&ret->readers, 0); |
175 | atomic_set(&ret->writers, 0); | |
1e1de387 | 176 | } |
651fb419 | 177 | return ret; |
760f991f QW |
178 | } |
179 | ||
180 | void btrfs_free_subpage(struct btrfs_subpage *subpage) | |
181 | { | |
cac06d84 QW |
182 | kfree(subpage); |
183 | } | |
8ff8466d QW |
184 | |
185 | /* | |
186 | * Increase the eb_refs of current subpage. | |
187 | * | |
188 | * This is important for eb allocation, to prevent race with last eb freeing | |
189 | * of the same page. | |
190 | * With the eb_refs increased before the eb inserted into radix tree, | |
191 | * detach_extent_buffer_page() won't detach the page private while we're still | |
192 | * allocating the extent buffer. | |
193 | */ | |
194 | void btrfs_page_inc_eb_refs(const struct btrfs_fs_info *fs_info, | |
195 | struct page *page) | |
196 | { | |
197 | struct btrfs_subpage *subpage; | |
198 | ||
fbca46eb | 199 | if (!btrfs_is_subpage(fs_info, page)) |
8ff8466d QW |
200 | return; |
201 | ||
202 | ASSERT(PagePrivate(page) && page->mapping); | |
203 | lockdep_assert_held(&page->mapping->private_lock); | |
204 | ||
205 | subpage = (struct btrfs_subpage *)page->private; | |
206 | atomic_inc(&subpage->eb_refs); | |
207 | } | |
208 | ||
209 | void btrfs_page_dec_eb_refs(const struct btrfs_fs_info *fs_info, | |
210 | struct page *page) | |
211 | { | |
212 | struct btrfs_subpage *subpage; | |
213 | ||
fbca46eb | 214 | if (!btrfs_is_subpage(fs_info, page)) |
8ff8466d QW |
215 | return; |
216 | ||
217 | ASSERT(PagePrivate(page) && page->mapping); | |
218 | lockdep_assert_held(&page->mapping->private_lock); | |
219 | ||
220 | subpage = (struct btrfs_subpage *)page->private; | |
221 | ASSERT(atomic_read(&subpage->eb_refs)); | |
222 | atomic_dec(&subpage->eb_refs); | |
223 | } | |
a1d767c1 | 224 | |
92082d40 | 225 | static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, |
a1d767c1 QW |
226 | struct page *page, u64 start, u32 len) |
227 | { | |
a1d767c1 QW |
228 | /* Basic checks */ |
229 | ASSERT(PagePrivate(page) && page->private); | |
230 | ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && | |
231 | IS_ALIGNED(len, fs_info->sectorsize)); | |
a1d767c1 QW |
232 | /* |
233 | * The range check only works for mapped page, we can still have | |
234 | * unmapped page like dummy extent buffer pages. | |
235 | */ | |
236 | if (page->mapping) | |
237 | ASSERT(page_offset(page) <= start && | |
238 | start + len <= page_offset(page) + PAGE_SIZE); | |
92082d40 QW |
239 | } |
240 | ||
241 | void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info, | |
242 | struct page *page, u64 start, u32 len) | |
243 | { | |
244 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
245 | const int nbits = len >> fs_info->sectorsize_bits; | |
92082d40 QW |
246 | |
247 | btrfs_subpage_assert(fs_info, page, start, len); | |
248 | ||
3d078efa | 249 | atomic_add(nbits, &subpage->readers); |
92082d40 QW |
250 | } |
251 | ||
252 | void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info, | |
253 | struct page *page, u64 start, u32 len) | |
254 | { | |
255 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
256 | const int nbits = len >> fs_info->sectorsize_bits; | |
3d078efa QW |
257 | bool is_data; |
258 | bool last; | |
92082d40 QW |
259 | |
260 | btrfs_subpage_assert(fs_info, page, start, len); | |
3d078efa | 261 | is_data = is_data_inode(page->mapping->host); |
92082d40 | 262 | ASSERT(atomic_read(&subpage->readers) >= nbits); |
3d078efa QW |
263 | last = atomic_sub_and_test(nbits, &subpage->readers); |
264 | ||
265 | /* | |
266 | * For data we need to unlock the page if the last read has finished. | |
267 | * | |
268 | * And please don't replace @last with atomic_sub_and_test() call | |
269 | * inside if () condition. | |
270 | * As we want the atomic_sub_and_test() to be always executed. | |
271 | */ | |
272 | if (is_data && last) | |
92082d40 QW |
273 | unlock_page(page); |
274 | } | |
275 | ||
1e1de387 QW |
276 | static void btrfs_subpage_clamp_range(struct page *page, u64 *start, u32 *len) |
277 | { | |
278 | u64 orig_start = *start; | |
279 | u32 orig_len = *len; | |
280 | ||
281 | *start = max_t(u64, page_offset(page), orig_start); | |
e4f94347 QW |
282 | /* |
283 | * For certain call sites like btrfs_drop_pages(), we may have pages | |
284 | * beyond the target range. In that case, just set @len to 0, subpage | |
285 | * helpers can handle @len == 0 without any problem. | |
286 | */ | |
287 | if (page_offset(page) >= orig_start + orig_len) | |
288 | *len = 0; | |
289 | else | |
290 | *len = min_t(u64, page_offset(page) + PAGE_SIZE, | |
291 | orig_start + orig_len) - *start; | |
1e1de387 QW |
292 | } |
293 | ||
294 | void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info, | |
295 | struct page *page, u64 start, u32 len) | |
296 | { | |
297 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
298 | const int nbits = (len >> fs_info->sectorsize_bits); | |
299 | int ret; | |
300 | ||
301 | btrfs_subpage_assert(fs_info, page, start, len); | |
302 | ||
303 | ASSERT(atomic_read(&subpage->readers) == 0); | |
304 | ret = atomic_add_return(nbits, &subpage->writers); | |
305 | ASSERT(ret == nbits); | |
306 | } | |
307 | ||
308 | bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info, | |
309 | struct page *page, u64 start, u32 len) | |
310 | { | |
311 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
312 | const int nbits = (len >> fs_info->sectorsize_bits); | |
313 | ||
314 | btrfs_subpage_assert(fs_info, page, start, len); | |
315 | ||
164674a7 QW |
316 | /* |
317 | * We have call sites passing @lock_page into | |
318 | * extent_clear_unlock_delalloc() for compression path. | |
319 | * | |
320 | * This @locked_page is locked by plain lock_page(), thus its | |
321 | * subpage::writers is 0. Handle them in a special way. | |
322 | */ | |
323 | if (atomic_read(&subpage->writers) == 0) | |
324 | return true; | |
325 | ||
1e1de387 QW |
326 | ASSERT(atomic_read(&subpage->writers) >= nbits); |
327 | return atomic_sub_and_test(nbits, &subpage->writers); | |
328 | } | |
329 | ||
330 | /* | |
331 | * Lock a page for delalloc page writeback. | |
332 | * | |
333 | * Return -EAGAIN if the page is not properly initialized. | |
334 | * Return 0 with the page locked, and writer counter updated. | |
335 | * | |
336 | * Even with 0 returned, the page still need extra check to make sure | |
337 | * it's really the correct page, as the caller is using | |
47d55419 | 338 | * filemap_get_folios_contig(), which can race with page invalidating. |
1e1de387 QW |
339 | */ |
340 | int btrfs_page_start_writer_lock(const struct btrfs_fs_info *fs_info, | |
341 | struct page *page, u64 start, u32 len) | |
342 | { | |
fbca46eb | 343 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { |
1e1de387 QW |
344 | lock_page(page); |
345 | return 0; | |
346 | } | |
347 | lock_page(page); | |
348 | if (!PagePrivate(page) || !page->private) { | |
349 | unlock_page(page); | |
350 | return -EAGAIN; | |
351 | } | |
352 | btrfs_subpage_clamp_range(page, &start, &len); | |
353 | btrfs_subpage_start_writer(fs_info, page, start, len); | |
354 | return 0; | |
355 | } | |
356 | ||
357 | void btrfs_page_end_writer_lock(const struct btrfs_fs_info *fs_info, | |
358 | struct page *page, u64 start, u32 len) | |
359 | { | |
fbca46eb | 360 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) |
1e1de387 QW |
361 | return unlock_page(page); |
362 | btrfs_subpage_clamp_range(page, &start, &len); | |
363 | if (btrfs_subpage_end_and_test_writer(fs_info, page, start, len)) | |
364 | unlock_page(page); | |
365 | } | |
366 | ||
72a69cd0 QW |
367 | #define subpage_calc_start_bit(fs_info, page, name, start, len) \ |
368 | ({ \ | |
369 | unsigned int start_bit; \ | |
370 | \ | |
371 | btrfs_subpage_assert(fs_info, page, start, len); \ | |
372 | start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \ | |
373 | start_bit += fs_info->subpage_info->name##_offset; \ | |
374 | start_bit; \ | |
375 | }) | |
376 | ||
377 | #define subpage_test_bitmap_all_set(fs_info, subpage, name) \ | |
378 | bitmap_test_range_all_set(subpage->bitmaps, \ | |
379 | fs_info->subpage_info->name##_offset, \ | |
380 | fs_info->subpage_info->bitmap_nr_bits) | |
381 | ||
382 | #define subpage_test_bitmap_all_zero(fs_info, subpage, name) \ | |
383 | bitmap_test_range_all_zero(subpage->bitmaps, \ | |
384 | fs_info->subpage_info->name##_offset, \ | |
385 | fs_info->subpage_info->bitmap_nr_bits) | |
386 | ||
a1d767c1 QW |
387 | void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info, |
388 | struct page *page, u64 start, u32 len) | |
389 | { | |
390 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
391 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
392 | uptodate, start, len); | |
a1d767c1 QW |
393 | unsigned long flags; |
394 | ||
395 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 QW |
396 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
397 | if (subpage_test_bitmap_all_set(fs_info, subpage, uptodate)) | |
a1d767c1 QW |
398 | SetPageUptodate(page); |
399 | spin_unlock_irqrestore(&subpage->lock, flags); | |
400 | } | |
401 | ||
402 | void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info, | |
403 | struct page *page, u64 start, u32 len) | |
404 | { | |
405 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
406 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
407 | uptodate, start, len); | |
a1d767c1 QW |
408 | unsigned long flags; |
409 | ||
410 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 | 411 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
a1d767c1 QW |
412 | ClearPageUptodate(page); |
413 | spin_unlock_irqrestore(&subpage->lock, flags); | |
414 | } | |
415 | ||
d8a5713e QW |
416 | void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info, |
417 | struct page *page, u64 start, u32 len) | |
418 | { | |
419 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
420 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
421 | dirty, start, len); | |
d8a5713e QW |
422 | unsigned long flags; |
423 | ||
424 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 | 425 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
d8a5713e QW |
426 | spin_unlock_irqrestore(&subpage->lock, flags); |
427 | set_page_dirty(page); | |
428 | } | |
429 | ||
430 | /* | |
431 | * Extra clear_and_test function for subpage dirty bitmap. | |
432 | * | |
433 | * Return true if we're the last bits in the dirty_bitmap and clear the | |
434 | * dirty_bitmap. | |
435 | * Return false otherwise. | |
436 | * | |
437 | * NOTE: Callers should manually clear page dirty for true case, as we have | |
438 | * extra handling for tree blocks. | |
439 | */ | |
440 | bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info, | |
441 | struct page *page, u64 start, u32 len) | |
442 | { | |
443 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
444 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
445 | dirty, start, len); | |
d8a5713e QW |
446 | unsigned long flags; |
447 | bool last = false; | |
448 | ||
449 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 QW |
450 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
451 | if (subpage_test_bitmap_all_zero(fs_info, subpage, dirty)) | |
d8a5713e QW |
452 | last = true; |
453 | spin_unlock_irqrestore(&subpage->lock, flags); | |
454 | return last; | |
455 | } | |
456 | ||
457 | void btrfs_subpage_clear_dirty(const struct btrfs_fs_info *fs_info, | |
458 | struct page *page, u64 start, u32 len) | |
459 | { | |
460 | bool last; | |
461 | ||
462 | last = btrfs_subpage_clear_and_test_dirty(fs_info, page, start, len); | |
463 | if (last) | |
464 | clear_page_dirty_for_io(page); | |
465 | } | |
466 | ||
3470da3b QW |
467 | void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, |
468 | struct page *page, u64 start, u32 len) | |
469 | { | |
470 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
471 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
472 | writeback, start, len); | |
3470da3b QW |
473 | unsigned long flags; |
474 | ||
475 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 | 476 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
3470da3b QW |
477 | set_page_writeback(page); |
478 | spin_unlock_irqrestore(&subpage->lock, flags); | |
479 | } | |
480 | ||
481 | void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info, | |
482 | struct page *page, u64 start, u32 len) | |
483 | { | |
484 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
485 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
486 | writeback, start, len); | |
3470da3b QW |
487 | unsigned long flags; |
488 | ||
489 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 QW |
490 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
491 | if (subpage_test_bitmap_all_zero(fs_info, subpage, writeback)) { | |
7c11d0ae | 492 | ASSERT(PageWriteback(page)); |
3470da3b | 493 | end_page_writeback(page); |
7c11d0ae | 494 | } |
3470da3b QW |
495 | spin_unlock_irqrestore(&subpage->lock, flags); |
496 | } | |
497 | ||
6f17400b QW |
498 | void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info, |
499 | struct page *page, u64 start, u32 len) | |
500 | { | |
501 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
502 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
503 | ordered, start, len); | |
6f17400b QW |
504 | unsigned long flags; |
505 | ||
506 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 | 507 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
6f17400b QW |
508 | SetPageOrdered(page); |
509 | spin_unlock_irqrestore(&subpage->lock, flags); | |
510 | } | |
511 | ||
512 | void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info, | |
513 | struct page *page, u64 start, u32 len) | |
514 | { | |
515 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
516 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
517 | ordered, start, len); | |
6f17400b QW |
518 | unsigned long flags; |
519 | ||
520 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 QW |
521 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
522 | if (subpage_test_bitmap_all_zero(fs_info, subpage, ordered)) | |
6f17400b QW |
523 | ClearPageOrdered(page); |
524 | spin_unlock_irqrestore(&subpage->lock, flags); | |
525 | } | |
e4f94347 QW |
526 | |
527 | void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info, | |
528 | struct page *page, u64 start, u32 len) | |
529 | { | |
530 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
531 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, | |
532 | checked, start, len); | |
533 | unsigned long flags; | |
534 | ||
535 | spin_lock_irqsave(&subpage->lock, flags); | |
536 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); | |
537 | if (subpage_test_bitmap_all_set(fs_info, subpage, checked)) | |
538 | SetPageChecked(page); | |
539 | spin_unlock_irqrestore(&subpage->lock, flags); | |
540 | } | |
541 | ||
542 | void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info, | |
543 | struct page *page, u64 start, u32 len) | |
544 | { | |
545 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
546 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, | |
547 | checked, start, len); | |
548 | unsigned long flags; | |
549 | ||
550 | spin_lock_irqsave(&subpage->lock, flags); | |
551 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); | |
552 | ClearPageChecked(page); | |
553 | spin_unlock_irqrestore(&subpage->lock, flags); | |
554 | } | |
555 | ||
a1d767c1 QW |
556 | /* |
557 | * Unlike set/clear which is dependent on each page status, for test all bits | |
558 | * are tested in the same way. | |
559 | */ | |
560 | #define IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(name) \ | |
561 | bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \ | |
562 | struct page *page, u64 start, u32 len) \ | |
563 | { \ | |
564 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; \ | |
72a69cd0 QW |
565 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, \ |
566 | name, start, len); \ | |
a1d767c1 QW |
567 | unsigned long flags; \ |
568 | bool ret; \ | |
569 | \ | |
570 | spin_lock_irqsave(&subpage->lock, flags); \ | |
72a69cd0 QW |
571 | ret = bitmap_test_range_all_set(subpage->bitmaps, start_bit, \ |
572 | len >> fs_info->sectorsize_bits); \ | |
a1d767c1 QW |
573 | spin_unlock_irqrestore(&subpage->lock, flags); \ |
574 | return ret; \ | |
575 | } | |
576 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate); | |
d8a5713e | 577 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty); |
3470da3b | 578 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback); |
6f17400b | 579 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered); |
e4f94347 | 580 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked); |
a1d767c1 QW |
581 | |
582 | /* | |
583 | * Note that, in selftests (extent-io-tests), we can have empty fs_info passed | |
584 | * in. We only test sectorsize == PAGE_SIZE cases so far, thus we can fall | |
585 | * back to regular sectorsize branch. | |
586 | */ | |
587 | #define IMPLEMENT_BTRFS_PAGE_OPS(name, set_page_func, clear_page_func, \ | |
588 | test_page_func) \ | |
589 | void btrfs_page_set_##name(const struct btrfs_fs_info *fs_info, \ | |
590 | struct page *page, u64 start, u32 len) \ | |
591 | { \ | |
fbca46eb | 592 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { \ |
a1d767c1 QW |
593 | set_page_func(page); \ |
594 | return; \ | |
595 | } \ | |
596 | btrfs_subpage_set_##name(fs_info, page, start, len); \ | |
597 | } \ | |
598 | void btrfs_page_clear_##name(const struct btrfs_fs_info *fs_info, \ | |
599 | struct page *page, u64 start, u32 len) \ | |
600 | { \ | |
fbca46eb | 601 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { \ |
a1d767c1 QW |
602 | clear_page_func(page); \ |
603 | return; \ | |
604 | } \ | |
605 | btrfs_subpage_clear_##name(fs_info, page, start, len); \ | |
606 | } \ | |
607 | bool btrfs_page_test_##name(const struct btrfs_fs_info *fs_info, \ | |
608 | struct page *page, u64 start, u32 len) \ | |
609 | { \ | |
fbca46eb | 610 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) \ |
a1d767c1 QW |
611 | return test_page_func(page); \ |
612 | return btrfs_subpage_test_##name(fs_info, page, start, len); \ | |
60e2d255 QW |
613 | } \ |
614 | void btrfs_page_clamp_set_##name(const struct btrfs_fs_info *fs_info, \ | |
615 | struct page *page, u64 start, u32 len) \ | |
616 | { \ | |
fbca46eb | 617 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { \ |
60e2d255 QW |
618 | set_page_func(page); \ |
619 | return; \ | |
620 | } \ | |
621 | btrfs_subpage_clamp_range(page, &start, &len); \ | |
622 | btrfs_subpage_set_##name(fs_info, page, start, len); \ | |
623 | } \ | |
624 | void btrfs_page_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \ | |
625 | struct page *page, u64 start, u32 len) \ | |
626 | { \ | |
fbca46eb | 627 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { \ |
60e2d255 QW |
628 | clear_page_func(page); \ |
629 | return; \ | |
630 | } \ | |
631 | btrfs_subpage_clamp_range(page, &start, &len); \ | |
632 | btrfs_subpage_clear_##name(fs_info, page, start, len); \ | |
633 | } \ | |
634 | bool btrfs_page_clamp_test_##name(const struct btrfs_fs_info *fs_info, \ | |
635 | struct page *page, u64 start, u32 len) \ | |
636 | { \ | |
fbca46eb | 637 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) \ |
60e2d255 QW |
638 | return test_page_func(page); \ |
639 | btrfs_subpage_clamp_range(page, &start, &len); \ | |
640 | return btrfs_subpage_test_##name(fs_info, page, start, len); \ | |
a1d767c1 QW |
641 | } |
642 | IMPLEMENT_BTRFS_PAGE_OPS(uptodate, SetPageUptodate, ClearPageUptodate, | |
643 | PageUptodate); | |
d8a5713e QW |
644 | IMPLEMENT_BTRFS_PAGE_OPS(dirty, set_page_dirty, clear_page_dirty_for_io, |
645 | PageDirty); | |
3470da3b QW |
646 | IMPLEMENT_BTRFS_PAGE_OPS(writeback, set_page_writeback, end_page_writeback, |
647 | PageWriteback); | |
6f17400b QW |
648 | IMPLEMENT_BTRFS_PAGE_OPS(ordered, SetPageOrdered, ClearPageOrdered, |
649 | PageOrdered); | |
e4f94347 | 650 | IMPLEMENT_BTRFS_PAGE_OPS(checked, SetPageChecked, ClearPageChecked, PageChecked); |
cc1d0d93 QW |
651 | |
652 | /* | |
653 | * Make sure not only the page dirty bit is cleared, but also subpage dirty bit | |
654 | * is cleared. | |
655 | */ | |
656 | void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info, | |
657 | struct page *page) | |
658 | { | |
659 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
660 | ||
661 | if (!IS_ENABLED(CONFIG_BTRFS_ASSERT)) | |
662 | return; | |
663 | ||
664 | ASSERT(!PageDirty(page)); | |
fbca46eb | 665 | if (!btrfs_is_subpage(fs_info, page)) |
cc1d0d93 QW |
666 | return; |
667 | ||
668 | ASSERT(PagePrivate(page) && page->private); | |
72a69cd0 | 669 | ASSERT(subpage_test_bitmap_all_zero(fs_info, subpage, dirty)); |
cc1d0d93 | 670 | } |
e55a0de1 QW |
671 | |
672 | /* | |
673 | * Handle different locked pages with different page sizes: | |
674 | * | |
675 | * - Page locked by plain lock_page() | |
676 | * It should not have any subpage::writers count. | |
677 | * Can be unlocked by unlock_page(). | |
678 | * This is the most common locked page for __extent_writepage() called | |
f3e90c1c | 679 | * inside extent_write_cache_pages(). |
e55a0de1 QW |
680 | * Rarer cases include the @locked_page from extent_write_locked_range(). |
681 | * | |
682 | * - Page locked by lock_delalloc_pages() | |
683 | * There is only one caller, all pages except @locked_page for | |
684 | * extent_write_locked_range(). | |
685 | * In this case, we have to call subpage helper to handle the case. | |
686 | */ | |
687 | void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page, | |
688 | u64 start, u32 len) | |
689 | { | |
690 | struct btrfs_subpage *subpage; | |
691 | ||
692 | ASSERT(PageLocked(page)); | |
fbca46eb QW |
693 | /* For non-subpage case, we just unlock the page */ |
694 | if (!btrfs_is_subpage(fs_info, page)) | |
e55a0de1 QW |
695 | return unlock_page(page); |
696 | ||
697 | ASSERT(PagePrivate(page) && page->private); | |
698 | subpage = (struct btrfs_subpage *)page->private; | |
699 | ||
700 | /* | |
701 | * For subpage case, there are two types of locked page. With or | |
702 | * without writers number. | |
703 | * | |
704 | * Since we own the page lock, no one else could touch subpage::writers | |
705 | * and we are safe to do several atomic operations without spinlock. | |
706 | */ | |
c992fa1f | 707 | if (atomic_read(&subpage->writers) == 0) |
e55a0de1 QW |
708 | /* No writers, locked by plain lock_page() */ |
709 | return unlock_page(page); | |
710 | ||
711 | /* Have writers, use proper subpage helper to end it */ | |
712 | btrfs_page_end_writer_lock(fs_info, page, start, len); | |
713 | } | |
75258f20 QW |
714 | |
715 | #define GET_SUBPAGE_BITMAP(subpage, subpage_info, name, dst) \ | |
716 | bitmap_cut(dst, subpage->bitmaps, 0, \ | |
717 | subpage_info->name##_offset, subpage_info->bitmap_nr_bits) | |
718 | ||
719 | void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info, | |
720 | struct page *page, u64 start, u32 len) | |
721 | { | |
722 | struct btrfs_subpage_info *subpage_info = fs_info->subpage_info; | |
723 | struct btrfs_subpage *subpage; | |
724 | unsigned long uptodate_bitmap; | |
725 | unsigned long error_bitmap; | |
726 | unsigned long dirty_bitmap; | |
727 | unsigned long writeback_bitmap; | |
728 | unsigned long ordered_bitmap; | |
729 | unsigned long checked_bitmap; | |
730 | unsigned long flags; | |
731 | ||
732 | ASSERT(PagePrivate(page) && page->private); | |
733 | ASSERT(subpage_info); | |
734 | subpage = (struct btrfs_subpage *)page->private; | |
735 | ||
736 | spin_lock_irqsave(&subpage->lock, flags); | |
737 | GET_SUBPAGE_BITMAP(subpage, subpage_info, uptodate, &uptodate_bitmap); | |
75258f20 QW |
738 | GET_SUBPAGE_BITMAP(subpage, subpage_info, dirty, &dirty_bitmap); |
739 | GET_SUBPAGE_BITMAP(subpage, subpage_info, writeback, &writeback_bitmap); | |
740 | GET_SUBPAGE_BITMAP(subpage, subpage_info, ordered, &ordered_bitmap); | |
741 | GET_SUBPAGE_BITMAP(subpage, subpage_info, checked, &checked_bitmap); | |
742 | spin_unlock_irqrestore(&subpage->lock, flags); | |
743 | ||
744 | dump_page(page, "btrfs subpage dump"); | |
745 | btrfs_warn(fs_info, | |
746 | "start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl error=%*pbl dirty=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl", | |
747 | start, len, page_offset(page), | |
748 | subpage_info->bitmap_nr_bits, &uptodate_bitmap, | |
749 | subpage_info->bitmap_nr_bits, &error_bitmap, | |
750 | subpage_info->bitmap_nr_bits, &dirty_bitmap, | |
751 | subpage_info->bitmap_nr_bits, &writeback_bitmap, | |
752 | subpage_info->bitmap_nr_bits, &ordered_bitmap, | |
753 | subpage_info->bitmap_nr_bits, &checked_bitmap); | |
754 | } |