Commit | Line | Data |
---|---|---|
cac06d84 QW |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include <linux/slab.h> | |
9b569ea0 | 4 | #include "messages.h" |
cac06d84 QW |
5 | #include "ctree.h" |
6 | #include "subpage.h" | |
3d078efa | 7 | #include "btrfs_inode.h" |
cac06d84 | 8 | |
894d1378 QW |
9 | /* |
10 | * Subpage (sectorsize < PAGE_SIZE) support overview: | |
11 | * | |
12 | * Limitations: | |
13 | * | |
14 | * - Only support 64K page size for now | |
15 | * This is to make metadata handling easier, as 64K page would ensure | |
16 | * all nodesize would fit inside one page, thus we don't need to handle | |
17 | * cases where a tree block crosses several pages. | |
18 | * | |
19 | * - Only metadata read-write for now | |
20 | * The data read-write part is in development. | |
21 | * | |
22 | * - Metadata can't cross 64K page boundary | |
23 | * btrfs-progs and kernel have done that for a while, thus only ancient | |
24 | * filesystems could have such problem. For such case, do a graceful | |
25 | * rejection. | |
26 | * | |
27 | * Special behavior: | |
28 | * | |
29 | * - Metadata | |
30 | * Metadata read is fully supported. | |
31 | * Meaning when reading one tree block will only trigger the read for the | |
32 | * needed range, other unrelated range in the same page will not be touched. | |
33 | * | |
34 | * Metadata write support is partial. | |
35 | * The writeback is still for the full page, but we will only submit | |
36 | * the dirty extent buffers in the page. | |
37 | * | |
38 | * This means, if we have a metadata page like this: | |
39 | * | |
40 | * Page offset | |
41 | * 0 16K 32K 48K 64K | |
42 | * |/////////| |///////////| | |
43 | * \- Tree block A \- Tree block B | |
44 | * | |
45 | * Even if we just want to writeback tree block A, we will also writeback | |
46 | * tree block B if it's also dirty. | |
47 | * | |
48 | * This may cause extra metadata writeback which results more COW. | |
49 | * | |
50 | * Implementation: | |
51 | * | |
52 | * - Common | |
53 | * Both metadata and data will use a new structure, btrfs_subpage, to | |
54 | * record the status of each sector inside a page. This provides the extra | |
55 | * granularity needed. | |
56 | * | |
57 | * - Metadata | |
58 | * Since we have multiple tree blocks inside one page, we can't rely on page | |
59 | * locking anymore, or we will have greatly reduced concurrency or even | |
60 | * deadlocks (hold one tree lock while trying to lock another tree lock in | |
61 | * the same page). | |
62 | * | |
63 | * Thus for metadata locking, subpage support relies on io_tree locking only. | |
64 | * This means a slightly higher tree locking latency. | |
65 | */ | |
66 | ||
fbca46eb QW |
67 | bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct page *page) |
68 | { | |
69 | if (fs_info->sectorsize >= PAGE_SIZE) | |
70 | return false; | |
71 | ||
72 | /* | |
73 | * Only data pages (either through DIO or compression) can have no | |
74 | * mapping. And if page->mapping->host is data inode, it's subpage. | |
75 | * As we have ruled our sectorsize >= PAGE_SIZE case already. | |
76 | */ | |
77 | if (!page->mapping || !page->mapping->host || | |
78 | is_data_inode(page->mapping->host)) | |
79 | return true; | |
80 | ||
81 | /* | |
82 | * Now the only remaining case is metadata, which we only go subpage | |
83 | * routine if nodesize < PAGE_SIZE. | |
84 | */ | |
85 | if (fs_info->nodesize < PAGE_SIZE) | |
86 | return true; | |
87 | return false; | |
88 | } | |
89 | ||
8481dd80 QW |
90 | void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize) |
91 | { | |
92 | unsigned int cur = 0; | |
93 | unsigned int nr_bits; | |
94 | ||
95 | ASSERT(IS_ALIGNED(PAGE_SIZE, sectorsize)); | |
96 | ||
97 | nr_bits = PAGE_SIZE / sectorsize; | |
98 | subpage_info->bitmap_nr_bits = nr_bits; | |
99 | ||
100 | subpage_info->uptodate_offset = cur; | |
101 | cur += nr_bits; | |
102 | ||
103 | subpage_info->error_offset = cur; | |
104 | cur += nr_bits; | |
105 | ||
106 | subpage_info->dirty_offset = cur; | |
107 | cur += nr_bits; | |
108 | ||
109 | subpage_info->writeback_offset = cur; | |
110 | cur += nr_bits; | |
111 | ||
112 | subpage_info->ordered_offset = cur; | |
113 | cur += nr_bits; | |
114 | ||
e4f94347 QW |
115 | subpage_info->checked_offset = cur; |
116 | cur += nr_bits; | |
117 | ||
8481dd80 QW |
118 | subpage_info->total_nr_bits = cur; |
119 | } | |
120 | ||
cac06d84 QW |
121 | int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, |
122 | struct page *page, enum btrfs_subpage_type type) | |
123 | { | |
651fb419 | 124 | struct btrfs_subpage *subpage; |
cac06d84 QW |
125 | |
126 | /* | |
143823cf | 127 | * We have cases like a dummy extent buffer page, which is not mapped |
cac06d84 QW |
128 | * and doesn't need to be locked. |
129 | */ | |
130 | if (page->mapping) | |
131 | ASSERT(PageLocked(page)); | |
651fb419 | 132 | |
cac06d84 | 133 | /* Either not subpage, or the page already has private attached */ |
fbca46eb | 134 | if (!btrfs_is_subpage(fs_info, page) || PagePrivate(page)) |
cac06d84 QW |
135 | return 0; |
136 | ||
651fb419 QW |
137 | subpage = btrfs_alloc_subpage(fs_info, type); |
138 | if (IS_ERR(subpage)) | |
139 | return PTR_ERR(subpage); | |
140 | ||
cac06d84 QW |
141 | attach_page_private(page, subpage); |
142 | return 0; | |
143 | } | |
144 | ||
145 | void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, | |
146 | struct page *page) | |
147 | { | |
148 | struct btrfs_subpage *subpage; | |
149 | ||
150 | /* Either not subpage, or already detached */ | |
fbca46eb | 151 | if (!btrfs_is_subpage(fs_info, page) || !PagePrivate(page)) |
cac06d84 QW |
152 | return; |
153 | ||
0d031dc4 | 154 | subpage = detach_page_private(page); |
cac06d84 | 155 | ASSERT(subpage); |
760f991f QW |
156 | btrfs_free_subpage(subpage); |
157 | } | |
158 | ||
651fb419 QW |
159 | struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, |
160 | enum btrfs_subpage_type type) | |
760f991f | 161 | { |
651fb419 | 162 | struct btrfs_subpage *ret; |
72a69cd0 | 163 | unsigned int real_size; |
651fb419 | 164 | |
fdf250db | 165 | ASSERT(fs_info->sectorsize < PAGE_SIZE); |
760f991f | 166 | |
72a69cd0 QW |
167 | real_size = struct_size(ret, bitmaps, |
168 | BITS_TO_LONGS(fs_info->subpage_info->total_nr_bits)); | |
169 | ret = kzalloc(real_size, GFP_NOFS); | |
651fb419 QW |
170 | if (!ret) |
171 | return ERR_PTR(-ENOMEM); | |
172 | ||
173 | spin_lock_init(&ret->lock); | |
1e1de387 | 174 | if (type == BTRFS_SUBPAGE_METADATA) { |
651fb419 | 175 | atomic_set(&ret->eb_refs, 0); |
1e1de387 | 176 | } else { |
651fb419 QW |
177 | atomic_set(&ret->readers, 0); |
178 | atomic_set(&ret->writers, 0); | |
1e1de387 | 179 | } |
651fb419 | 180 | return ret; |
760f991f QW |
181 | } |
182 | ||
183 | void btrfs_free_subpage(struct btrfs_subpage *subpage) | |
184 | { | |
cac06d84 QW |
185 | kfree(subpage); |
186 | } | |
8ff8466d QW |
187 | |
188 | /* | |
189 | * Increase the eb_refs of current subpage. | |
190 | * | |
191 | * This is important for eb allocation, to prevent race with last eb freeing | |
192 | * of the same page. | |
193 | * With the eb_refs increased before the eb inserted into radix tree, | |
194 | * detach_extent_buffer_page() won't detach the page private while we're still | |
195 | * allocating the extent buffer. | |
196 | */ | |
197 | void btrfs_page_inc_eb_refs(const struct btrfs_fs_info *fs_info, | |
198 | struct page *page) | |
199 | { | |
200 | struct btrfs_subpage *subpage; | |
201 | ||
fbca46eb | 202 | if (!btrfs_is_subpage(fs_info, page)) |
8ff8466d QW |
203 | return; |
204 | ||
205 | ASSERT(PagePrivate(page) && page->mapping); | |
206 | lockdep_assert_held(&page->mapping->private_lock); | |
207 | ||
208 | subpage = (struct btrfs_subpage *)page->private; | |
209 | atomic_inc(&subpage->eb_refs); | |
210 | } | |
211 | ||
212 | void btrfs_page_dec_eb_refs(const struct btrfs_fs_info *fs_info, | |
213 | struct page *page) | |
214 | { | |
215 | struct btrfs_subpage *subpage; | |
216 | ||
fbca46eb | 217 | if (!btrfs_is_subpage(fs_info, page)) |
8ff8466d QW |
218 | return; |
219 | ||
220 | ASSERT(PagePrivate(page) && page->mapping); | |
221 | lockdep_assert_held(&page->mapping->private_lock); | |
222 | ||
223 | subpage = (struct btrfs_subpage *)page->private; | |
224 | ASSERT(atomic_read(&subpage->eb_refs)); | |
225 | atomic_dec(&subpage->eb_refs); | |
226 | } | |
a1d767c1 | 227 | |
92082d40 | 228 | static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, |
a1d767c1 QW |
229 | struct page *page, u64 start, u32 len) |
230 | { | |
a1d767c1 QW |
231 | /* Basic checks */ |
232 | ASSERT(PagePrivate(page) && page->private); | |
233 | ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && | |
234 | IS_ALIGNED(len, fs_info->sectorsize)); | |
a1d767c1 QW |
235 | /* |
236 | * The range check only works for mapped page, we can still have | |
237 | * unmapped page like dummy extent buffer pages. | |
238 | */ | |
239 | if (page->mapping) | |
240 | ASSERT(page_offset(page) <= start && | |
241 | start + len <= page_offset(page) + PAGE_SIZE); | |
92082d40 QW |
242 | } |
243 | ||
244 | void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info, | |
245 | struct page *page, u64 start, u32 len) | |
246 | { | |
247 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
248 | const int nbits = len >> fs_info->sectorsize_bits; | |
92082d40 QW |
249 | |
250 | btrfs_subpage_assert(fs_info, page, start, len); | |
251 | ||
3d078efa | 252 | atomic_add(nbits, &subpage->readers); |
92082d40 QW |
253 | } |
254 | ||
255 | void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info, | |
256 | struct page *page, u64 start, u32 len) | |
257 | { | |
258 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
259 | const int nbits = len >> fs_info->sectorsize_bits; | |
3d078efa QW |
260 | bool is_data; |
261 | bool last; | |
92082d40 QW |
262 | |
263 | btrfs_subpage_assert(fs_info, page, start, len); | |
3d078efa | 264 | is_data = is_data_inode(page->mapping->host); |
92082d40 | 265 | ASSERT(atomic_read(&subpage->readers) >= nbits); |
3d078efa QW |
266 | last = atomic_sub_and_test(nbits, &subpage->readers); |
267 | ||
268 | /* | |
269 | * For data we need to unlock the page if the last read has finished. | |
270 | * | |
271 | * And please don't replace @last with atomic_sub_and_test() call | |
272 | * inside if () condition. | |
273 | * As we want the atomic_sub_and_test() to be always executed. | |
274 | */ | |
275 | if (is_data && last) | |
92082d40 QW |
276 | unlock_page(page); |
277 | } | |
278 | ||
1e1de387 QW |
279 | static void btrfs_subpage_clamp_range(struct page *page, u64 *start, u32 *len) |
280 | { | |
281 | u64 orig_start = *start; | |
282 | u32 orig_len = *len; | |
283 | ||
284 | *start = max_t(u64, page_offset(page), orig_start); | |
e4f94347 QW |
285 | /* |
286 | * For certain call sites like btrfs_drop_pages(), we may have pages | |
287 | * beyond the target range. In that case, just set @len to 0, subpage | |
288 | * helpers can handle @len == 0 without any problem. | |
289 | */ | |
290 | if (page_offset(page) >= orig_start + orig_len) | |
291 | *len = 0; | |
292 | else | |
293 | *len = min_t(u64, page_offset(page) + PAGE_SIZE, | |
294 | orig_start + orig_len) - *start; | |
1e1de387 QW |
295 | } |
296 | ||
297 | void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info, | |
298 | struct page *page, u64 start, u32 len) | |
299 | { | |
300 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
301 | const int nbits = (len >> fs_info->sectorsize_bits); | |
302 | int ret; | |
303 | ||
304 | btrfs_subpage_assert(fs_info, page, start, len); | |
305 | ||
306 | ASSERT(atomic_read(&subpage->readers) == 0); | |
307 | ret = atomic_add_return(nbits, &subpage->writers); | |
308 | ASSERT(ret == nbits); | |
309 | } | |
310 | ||
311 | bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info, | |
312 | struct page *page, u64 start, u32 len) | |
313 | { | |
314 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
315 | const int nbits = (len >> fs_info->sectorsize_bits); | |
316 | ||
317 | btrfs_subpage_assert(fs_info, page, start, len); | |
318 | ||
164674a7 QW |
319 | /* |
320 | * We have call sites passing @lock_page into | |
321 | * extent_clear_unlock_delalloc() for compression path. | |
322 | * | |
323 | * This @locked_page is locked by plain lock_page(), thus its | |
324 | * subpage::writers is 0. Handle them in a special way. | |
325 | */ | |
326 | if (atomic_read(&subpage->writers) == 0) | |
327 | return true; | |
328 | ||
1e1de387 QW |
329 | ASSERT(atomic_read(&subpage->writers) >= nbits); |
330 | return atomic_sub_and_test(nbits, &subpage->writers); | |
331 | } | |
332 | ||
333 | /* | |
334 | * Lock a page for delalloc page writeback. | |
335 | * | |
336 | * Return -EAGAIN if the page is not properly initialized. | |
337 | * Return 0 with the page locked, and writer counter updated. | |
338 | * | |
339 | * Even with 0 returned, the page still need extra check to make sure | |
340 | * it's really the correct page, as the caller is using | |
47d55419 | 341 | * filemap_get_folios_contig(), which can race with page invalidating. |
1e1de387 QW |
342 | */ |
343 | int btrfs_page_start_writer_lock(const struct btrfs_fs_info *fs_info, | |
344 | struct page *page, u64 start, u32 len) | |
345 | { | |
fbca46eb | 346 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { |
1e1de387 QW |
347 | lock_page(page); |
348 | return 0; | |
349 | } | |
350 | lock_page(page); | |
351 | if (!PagePrivate(page) || !page->private) { | |
352 | unlock_page(page); | |
353 | return -EAGAIN; | |
354 | } | |
355 | btrfs_subpage_clamp_range(page, &start, &len); | |
356 | btrfs_subpage_start_writer(fs_info, page, start, len); | |
357 | return 0; | |
358 | } | |
359 | ||
360 | void btrfs_page_end_writer_lock(const struct btrfs_fs_info *fs_info, | |
361 | struct page *page, u64 start, u32 len) | |
362 | { | |
fbca46eb | 363 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) |
1e1de387 QW |
364 | return unlock_page(page); |
365 | btrfs_subpage_clamp_range(page, &start, &len); | |
366 | if (btrfs_subpage_end_and_test_writer(fs_info, page, start, len)) | |
367 | unlock_page(page); | |
368 | } | |
369 | ||
72a69cd0 QW |
370 | static bool bitmap_test_range_all_set(unsigned long *addr, unsigned int start, |
371 | unsigned int nbits) | |
92082d40 | 372 | { |
72a69cd0 | 373 | unsigned int found_zero; |
92082d40 | 374 | |
72a69cd0 QW |
375 | found_zero = find_next_zero_bit(addr, start + nbits, start); |
376 | if (found_zero == start + nbits) | |
377 | return true; | |
378 | return false; | |
379 | } | |
92082d40 | 380 | |
72a69cd0 QW |
381 | static bool bitmap_test_range_all_zero(unsigned long *addr, unsigned int start, |
382 | unsigned int nbits) | |
383 | { | |
384 | unsigned int found_set; | |
385 | ||
386 | found_set = find_next_bit(addr, start + nbits, start); | |
387 | if (found_set == start + nbits) | |
388 | return true; | |
389 | return false; | |
a1d767c1 QW |
390 | } |
391 | ||
72a69cd0 QW |
392 | #define subpage_calc_start_bit(fs_info, page, name, start, len) \ |
393 | ({ \ | |
394 | unsigned int start_bit; \ | |
395 | \ | |
396 | btrfs_subpage_assert(fs_info, page, start, len); \ | |
397 | start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \ | |
398 | start_bit += fs_info->subpage_info->name##_offset; \ | |
399 | start_bit; \ | |
400 | }) | |
401 | ||
402 | #define subpage_test_bitmap_all_set(fs_info, subpage, name) \ | |
403 | bitmap_test_range_all_set(subpage->bitmaps, \ | |
404 | fs_info->subpage_info->name##_offset, \ | |
405 | fs_info->subpage_info->bitmap_nr_bits) | |
406 | ||
407 | #define subpage_test_bitmap_all_zero(fs_info, subpage, name) \ | |
408 | bitmap_test_range_all_zero(subpage->bitmaps, \ | |
409 | fs_info->subpage_info->name##_offset, \ | |
410 | fs_info->subpage_info->bitmap_nr_bits) | |
411 | ||
a1d767c1 QW |
412 | void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info, |
413 | struct page *page, u64 start, u32 len) | |
414 | { | |
415 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
416 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
417 | uptodate, start, len); | |
a1d767c1 QW |
418 | unsigned long flags; |
419 | ||
420 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 QW |
421 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
422 | if (subpage_test_bitmap_all_set(fs_info, subpage, uptodate)) | |
a1d767c1 QW |
423 | SetPageUptodate(page); |
424 | spin_unlock_irqrestore(&subpage->lock, flags); | |
425 | } | |
426 | ||
427 | void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info, | |
428 | struct page *page, u64 start, u32 len) | |
429 | { | |
430 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
431 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
432 | uptodate, start, len); | |
a1d767c1 QW |
433 | unsigned long flags; |
434 | ||
435 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 | 436 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
a1d767c1 QW |
437 | ClearPageUptodate(page); |
438 | spin_unlock_irqrestore(&subpage->lock, flags); | |
439 | } | |
440 | ||
03a816b3 QW |
441 | void btrfs_subpage_set_error(const struct btrfs_fs_info *fs_info, |
442 | struct page *page, u64 start, u32 len) | |
443 | { | |
444 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
445 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
446 | error, start, len); | |
03a816b3 QW |
447 | unsigned long flags; |
448 | ||
449 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 | 450 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
03a816b3 QW |
451 | SetPageError(page); |
452 | spin_unlock_irqrestore(&subpage->lock, flags); | |
453 | } | |
454 | ||
455 | void btrfs_subpage_clear_error(const struct btrfs_fs_info *fs_info, | |
456 | struct page *page, u64 start, u32 len) | |
457 | { | |
458 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
459 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
460 | error, start, len); | |
03a816b3 QW |
461 | unsigned long flags; |
462 | ||
463 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 QW |
464 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
465 | if (subpage_test_bitmap_all_zero(fs_info, subpage, error)) | |
03a816b3 QW |
466 | ClearPageError(page); |
467 | spin_unlock_irqrestore(&subpage->lock, flags); | |
468 | } | |
469 | ||
d8a5713e QW |
470 | void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info, |
471 | struct page *page, u64 start, u32 len) | |
472 | { | |
473 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
474 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
475 | dirty, start, len); | |
d8a5713e QW |
476 | unsigned long flags; |
477 | ||
478 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 | 479 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
d8a5713e QW |
480 | spin_unlock_irqrestore(&subpage->lock, flags); |
481 | set_page_dirty(page); | |
482 | } | |
483 | ||
484 | /* | |
485 | * Extra clear_and_test function for subpage dirty bitmap. | |
486 | * | |
487 | * Return true if we're the last bits in the dirty_bitmap and clear the | |
488 | * dirty_bitmap. | |
489 | * Return false otherwise. | |
490 | * | |
491 | * NOTE: Callers should manually clear page dirty for true case, as we have | |
492 | * extra handling for tree blocks. | |
493 | */ | |
494 | bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info, | |
495 | struct page *page, u64 start, u32 len) | |
496 | { | |
497 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
498 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
499 | dirty, start, len); | |
d8a5713e QW |
500 | unsigned long flags; |
501 | bool last = false; | |
502 | ||
503 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 QW |
504 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
505 | if (subpage_test_bitmap_all_zero(fs_info, subpage, dirty)) | |
d8a5713e QW |
506 | last = true; |
507 | spin_unlock_irqrestore(&subpage->lock, flags); | |
508 | return last; | |
509 | } | |
510 | ||
511 | void btrfs_subpage_clear_dirty(const struct btrfs_fs_info *fs_info, | |
512 | struct page *page, u64 start, u32 len) | |
513 | { | |
514 | bool last; | |
515 | ||
516 | last = btrfs_subpage_clear_and_test_dirty(fs_info, page, start, len); | |
517 | if (last) | |
518 | clear_page_dirty_for_io(page); | |
519 | } | |
520 | ||
3470da3b QW |
521 | void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, |
522 | struct page *page, u64 start, u32 len) | |
523 | { | |
524 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
525 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
526 | writeback, start, len); | |
3470da3b QW |
527 | unsigned long flags; |
528 | ||
529 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 | 530 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
3470da3b QW |
531 | set_page_writeback(page); |
532 | spin_unlock_irqrestore(&subpage->lock, flags); | |
533 | } | |
534 | ||
535 | void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info, | |
536 | struct page *page, u64 start, u32 len) | |
537 | { | |
538 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
539 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
540 | writeback, start, len); | |
3470da3b QW |
541 | unsigned long flags; |
542 | ||
543 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 QW |
544 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
545 | if (subpage_test_bitmap_all_zero(fs_info, subpage, writeback)) { | |
7c11d0ae | 546 | ASSERT(PageWriteback(page)); |
3470da3b | 547 | end_page_writeback(page); |
7c11d0ae | 548 | } |
3470da3b QW |
549 | spin_unlock_irqrestore(&subpage->lock, flags); |
550 | } | |
551 | ||
6f17400b QW |
552 | void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info, |
553 | struct page *page, u64 start, u32 len) | |
554 | { | |
555 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
556 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
557 | ordered, start, len); | |
6f17400b QW |
558 | unsigned long flags; |
559 | ||
560 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 | 561 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
6f17400b QW |
562 | SetPageOrdered(page); |
563 | spin_unlock_irqrestore(&subpage->lock, flags); | |
564 | } | |
565 | ||
566 | void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info, | |
567 | struct page *page, u64 start, u32 len) | |
568 | { | |
569 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
570 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
571 | ordered, start, len); | |
6f17400b QW |
572 | unsigned long flags; |
573 | ||
574 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 QW |
575 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
576 | if (subpage_test_bitmap_all_zero(fs_info, subpage, ordered)) | |
6f17400b QW |
577 | ClearPageOrdered(page); |
578 | spin_unlock_irqrestore(&subpage->lock, flags); | |
579 | } | |
e4f94347 QW |
580 | |
581 | void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info, | |
582 | struct page *page, u64 start, u32 len) | |
583 | { | |
584 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
585 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, | |
586 | checked, start, len); | |
587 | unsigned long flags; | |
588 | ||
589 | spin_lock_irqsave(&subpage->lock, flags); | |
590 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); | |
591 | if (subpage_test_bitmap_all_set(fs_info, subpage, checked)) | |
592 | SetPageChecked(page); | |
593 | spin_unlock_irqrestore(&subpage->lock, flags); | |
594 | } | |
595 | ||
596 | void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info, | |
597 | struct page *page, u64 start, u32 len) | |
598 | { | |
599 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
600 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, | |
601 | checked, start, len); | |
602 | unsigned long flags; | |
603 | ||
604 | spin_lock_irqsave(&subpage->lock, flags); | |
605 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); | |
606 | ClearPageChecked(page); | |
607 | spin_unlock_irqrestore(&subpage->lock, flags); | |
608 | } | |
609 | ||
a1d767c1 QW |
610 | /* |
611 | * Unlike set/clear which is dependent on each page status, for test all bits | |
612 | * are tested in the same way. | |
613 | */ | |
614 | #define IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(name) \ | |
615 | bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \ | |
616 | struct page *page, u64 start, u32 len) \ | |
617 | { \ | |
618 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; \ | |
72a69cd0 QW |
619 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, \ |
620 | name, start, len); \ | |
a1d767c1 QW |
621 | unsigned long flags; \ |
622 | bool ret; \ | |
623 | \ | |
624 | spin_lock_irqsave(&subpage->lock, flags); \ | |
72a69cd0 QW |
625 | ret = bitmap_test_range_all_set(subpage->bitmaps, start_bit, \ |
626 | len >> fs_info->sectorsize_bits); \ | |
a1d767c1 QW |
627 | spin_unlock_irqrestore(&subpage->lock, flags); \ |
628 | return ret; \ | |
629 | } | |
630 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate); | |
03a816b3 | 631 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(error); |
d8a5713e | 632 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty); |
3470da3b | 633 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback); |
6f17400b | 634 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered); |
e4f94347 | 635 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked); |
a1d767c1 QW |
636 | |
637 | /* | |
638 | * Note that, in selftests (extent-io-tests), we can have empty fs_info passed | |
639 | * in. We only test sectorsize == PAGE_SIZE cases so far, thus we can fall | |
640 | * back to regular sectorsize branch. | |
641 | */ | |
642 | #define IMPLEMENT_BTRFS_PAGE_OPS(name, set_page_func, clear_page_func, \ | |
643 | test_page_func) \ | |
644 | void btrfs_page_set_##name(const struct btrfs_fs_info *fs_info, \ | |
645 | struct page *page, u64 start, u32 len) \ | |
646 | { \ | |
fbca46eb | 647 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { \ |
a1d767c1 QW |
648 | set_page_func(page); \ |
649 | return; \ | |
650 | } \ | |
651 | btrfs_subpage_set_##name(fs_info, page, start, len); \ | |
652 | } \ | |
653 | void btrfs_page_clear_##name(const struct btrfs_fs_info *fs_info, \ | |
654 | struct page *page, u64 start, u32 len) \ | |
655 | { \ | |
fbca46eb | 656 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { \ |
a1d767c1 QW |
657 | clear_page_func(page); \ |
658 | return; \ | |
659 | } \ | |
660 | btrfs_subpage_clear_##name(fs_info, page, start, len); \ | |
661 | } \ | |
662 | bool btrfs_page_test_##name(const struct btrfs_fs_info *fs_info, \ | |
663 | struct page *page, u64 start, u32 len) \ | |
664 | { \ | |
fbca46eb | 665 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) \ |
a1d767c1 QW |
666 | return test_page_func(page); \ |
667 | return btrfs_subpage_test_##name(fs_info, page, start, len); \ | |
60e2d255 QW |
668 | } \ |
669 | void btrfs_page_clamp_set_##name(const struct btrfs_fs_info *fs_info, \ | |
670 | struct page *page, u64 start, u32 len) \ | |
671 | { \ | |
fbca46eb | 672 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { \ |
60e2d255 QW |
673 | set_page_func(page); \ |
674 | return; \ | |
675 | } \ | |
676 | btrfs_subpage_clamp_range(page, &start, &len); \ | |
677 | btrfs_subpage_set_##name(fs_info, page, start, len); \ | |
678 | } \ | |
679 | void btrfs_page_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \ | |
680 | struct page *page, u64 start, u32 len) \ | |
681 | { \ | |
fbca46eb | 682 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { \ |
60e2d255 QW |
683 | clear_page_func(page); \ |
684 | return; \ | |
685 | } \ | |
686 | btrfs_subpage_clamp_range(page, &start, &len); \ | |
687 | btrfs_subpage_clear_##name(fs_info, page, start, len); \ | |
688 | } \ | |
689 | bool btrfs_page_clamp_test_##name(const struct btrfs_fs_info *fs_info, \ | |
690 | struct page *page, u64 start, u32 len) \ | |
691 | { \ | |
fbca46eb | 692 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) \ |
60e2d255 QW |
693 | return test_page_func(page); \ |
694 | btrfs_subpage_clamp_range(page, &start, &len); \ | |
695 | return btrfs_subpage_test_##name(fs_info, page, start, len); \ | |
a1d767c1 QW |
696 | } |
697 | IMPLEMENT_BTRFS_PAGE_OPS(uptodate, SetPageUptodate, ClearPageUptodate, | |
698 | PageUptodate); | |
03a816b3 | 699 | IMPLEMENT_BTRFS_PAGE_OPS(error, SetPageError, ClearPageError, PageError); |
d8a5713e QW |
700 | IMPLEMENT_BTRFS_PAGE_OPS(dirty, set_page_dirty, clear_page_dirty_for_io, |
701 | PageDirty); | |
3470da3b QW |
702 | IMPLEMENT_BTRFS_PAGE_OPS(writeback, set_page_writeback, end_page_writeback, |
703 | PageWriteback); | |
6f17400b QW |
704 | IMPLEMENT_BTRFS_PAGE_OPS(ordered, SetPageOrdered, ClearPageOrdered, |
705 | PageOrdered); | |
e4f94347 | 706 | IMPLEMENT_BTRFS_PAGE_OPS(checked, SetPageChecked, ClearPageChecked, PageChecked); |
cc1d0d93 QW |
707 | |
708 | /* | |
709 | * Make sure not only the page dirty bit is cleared, but also subpage dirty bit | |
710 | * is cleared. | |
711 | */ | |
712 | void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info, | |
713 | struct page *page) | |
714 | { | |
715 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
716 | ||
717 | if (!IS_ENABLED(CONFIG_BTRFS_ASSERT)) | |
718 | return; | |
719 | ||
720 | ASSERT(!PageDirty(page)); | |
fbca46eb | 721 | if (!btrfs_is_subpage(fs_info, page)) |
cc1d0d93 QW |
722 | return; |
723 | ||
724 | ASSERT(PagePrivate(page) && page->private); | |
72a69cd0 | 725 | ASSERT(subpage_test_bitmap_all_zero(fs_info, subpage, dirty)); |
cc1d0d93 | 726 | } |
e55a0de1 QW |
727 | |
728 | /* | |
729 | * Handle different locked pages with different page sizes: | |
730 | * | |
731 | * - Page locked by plain lock_page() | |
732 | * It should not have any subpage::writers count. | |
733 | * Can be unlocked by unlock_page(). | |
734 | * This is the most common locked page for __extent_writepage() called | |
f3e90c1c | 735 | * inside extent_write_cache_pages(). |
e55a0de1 QW |
736 | * Rarer cases include the @locked_page from extent_write_locked_range(). |
737 | * | |
738 | * - Page locked by lock_delalloc_pages() | |
739 | * There is only one caller, all pages except @locked_page for | |
740 | * extent_write_locked_range(). | |
741 | * In this case, we have to call subpage helper to handle the case. | |
742 | */ | |
743 | void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page, | |
744 | u64 start, u32 len) | |
745 | { | |
746 | struct btrfs_subpage *subpage; | |
747 | ||
748 | ASSERT(PageLocked(page)); | |
fbca46eb QW |
749 | /* For non-subpage case, we just unlock the page */ |
750 | if (!btrfs_is_subpage(fs_info, page)) | |
e55a0de1 QW |
751 | return unlock_page(page); |
752 | ||
753 | ASSERT(PagePrivate(page) && page->private); | |
754 | subpage = (struct btrfs_subpage *)page->private; | |
755 | ||
756 | /* | |
757 | * For subpage case, there are two types of locked page. With or | |
758 | * without writers number. | |
759 | * | |
760 | * Since we own the page lock, no one else could touch subpage::writers | |
761 | * and we are safe to do several atomic operations without spinlock. | |
762 | */ | |
c992fa1f | 763 | if (atomic_read(&subpage->writers) == 0) |
e55a0de1 QW |
764 | /* No writers, locked by plain lock_page() */ |
765 | return unlock_page(page); | |
766 | ||
767 | /* Have writers, use proper subpage helper to end it */ | |
768 | btrfs_page_end_writer_lock(fs_info, page, start, len); | |
769 | } |