Commit | Line | Data |
---|---|---|
cac06d84 QW |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include <linux/slab.h> | |
4 | #include "ctree.h" | |
5 | #include "subpage.h" | |
3d078efa | 6 | #include "btrfs_inode.h" |
cac06d84 | 7 | |
894d1378 QW |
8 | /* |
9 | * Subpage (sectorsize < PAGE_SIZE) support overview: | |
10 | * | |
11 | * Limitations: | |
12 | * | |
13 | * - Only support 64K page size for now | |
14 | * This is to make metadata handling easier, as 64K page would ensure | |
15 | * all nodesize would fit inside one page, thus we don't need to handle | |
16 | * cases where a tree block crosses several pages. | |
17 | * | |
18 | * - Only metadata read-write for now | |
19 | * The data read-write part is in development. | |
20 | * | |
21 | * - Metadata can't cross 64K page boundary | |
22 | * btrfs-progs and kernel have done that for a while, thus only ancient | |
23 | * filesystems could have such problem. For such case, do a graceful | |
24 | * rejection. | |
25 | * | |
26 | * Special behavior: | |
27 | * | |
28 | * - Metadata | |
29 | * Metadata read is fully supported. | |
30 | * Meaning when reading one tree block will only trigger the read for the | |
31 | * needed range, other unrelated range in the same page will not be touched. | |
32 | * | |
33 | * Metadata write support is partial. | |
34 | * The writeback is still for the full page, but we will only submit | |
35 | * the dirty extent buffers in the page. | |
36 | * | |
37 | * This means, if we have a metadata page like this: | |
38 | * | |
39 | * Page offset | |
40 | * 0 16K 32K 48K 64K | |
41 | * |/////////| |///////////| | |
42 | * \- Tree block A \- Tree block B | |
43 | * | |
44 | * Even if we just want to writeback tree block A, we will also writeback | |
45 | * tree block B if it's also dirty. | |
46 | * | |
47 | * This may cause extra metadata writeback which results more COW. | |
48 | * | |
49 | * Implementation: | |
50 | * | |
51 | * - Common | |
52 | * Both metadata and data will use a new structure, btrfs_subpage, to | |
53 | * record the status of each sector inside a page. This provides the extra | |
54 | * granularity needed. | |
55 | * | |
56 | * - Metadata | |
57 | * Since we have multiple tree blocks inside one page, we can't rely on page | |
58 | * locking anymore, or we will have greatly reduced concurrency or even | |
59 | * deadlocks (hold one tree lock while trying to lock another tree lock in | |
60 | * the same page). | |
61 | * | |
62 | * Thus for metadata locking, subpage support relies on io_tree locking only. | |
63 | * This means a slightly higher tree locking latency. | |
64 | */ | |
65 | ||
fbca46eb QW |
66 | bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct page *page) |
67 | { | |
68 | if (fs_info->sectorsize >= PAGE_SIZE) | |
69 | return false; | |
70 | ||
71 | /* | |
72 | * Only data pages (either through DIO or compression) can have no | |
73 | * mapping. And if page->mapping->host is data inode, it's subpage. | |
74 | * As we have ruled our sectorsize >= PAGE_SIZE case already. | |
75 | */ | |
76 | if (!page->mapping || !page->mapping->host || | |
77 | is_data_inode(page->mapping->host)) | |
78 | return true; | |
79 | ||
80 | /* | |
81 | * Now the only remaining case is metadata, which we only go subpage | |
82 | * routine if nodesize < PAGE_SIZE. | |
83 | */ | |
84 | if (fs_info->nodesize < PAGE_SIZE) | |
85 | return true; | |
86 | return false; | |
87 | } | |
88 | ||
8481dd80 QW |
89 | void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize) |
90 | { | |
91 | unsigned int cur = 0; | |
92 | unsigned int nr_bits; | |
93 | ||
94 | ASSERT(IS_ALIGNED(PAGE_SIZE, sectorsize)); | |
95 | ||
96 | nr_bits = PAGE_SIZE / sectorsize; | |
97 | subpage_info->bitmap_nr_bits = nr_bits; | |
98 | ||
99 | subpage_info->uptodate_offset = cur; | |
100 | cur += nr_bits; | |
101 | ||
102 | subpage_info->error_offset = cur; | |
103 | cur += nr_bits; | |
104 | ||
105 | subpage_info->dirty_offset = cur; | |
106 | cur += nr_bits; | |
107 | ||
108 | subpage_info->writeback_offset = cur; | |
109 | cur += nr_bits; | |
110 | ||
111 | subpage_info->ordered_offset = cur; | |
112 | cur += nr_bits; | |
113 | ||
e4f94347 QW |
114 | subpage_info->checked_offset = cur; |
115 | cur += nr_bits; | |
116 | ||
8481dd80 QW |
117 | subpage_info->total_nr_bits = cur; |
118 | } | |
119 | ||
cac06d84 QW |
120 | int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, |
121 | struct page *page, enum btrfs_subpage_type type) | |
122 | { | |
651fb419 | 123 | struct btrfs_subpage *subpage; |
cac06d84 QW |
124 | |
125 | /* | |
143823cf | 126 | * We have cases like a dummy extent buffer page, which is not mapped |
cac06d84 QW |
127 | * and doesn't need to be locked. |
128 | */ | |
129 | if (page->mapping) | |
130 | ASSERT(PageLocked(page)); | |
651fb419 | 131 | |
cac06d84 | 132 | /* Either not subpage, or the page already has private attached */ |
fbca46eb | 133 | if (!btrfs_is_subpage(fs_info, page) || PagePrivate(page)) |
cac06d84 QW |
134 | return 0; |
135 | ||
651fb419 QW |
136 | subpage = btrfs_alloc_subpage(fs_info, type); |
137 | if (IS_ERR(subpage)) | |
138 | return PTR_ERR(subpage); | |
139 | ||
cac06d84 QW |
140 | attach_page_private(page, subpage); |
141 | return 0; | |
142 | } | |
143 | ||
144 | void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, | |
145 | struct page *page) | |
146 | { | |
147 | struct btrfs_subpage *subpage; | |
148 | ||
149 | /* Either not subpage, or already detached */ | |
fbca46eb | 150 | if (!btrfs_is_subpage(fs_info, page) || !PagePrivate(page)) |
cac06d84 QW |
151 | return; |
152 | ||
0d031dc4 | 153 | subpage = detach_page_private(page); |
cac06d84 | 154 | ASSERT(subpage); |
760f991f QW |
155 | btrfs_free_subpage(subpage); |
156 | } | |
157 | ||
651fb419 QW |
158 | struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, |
159 | enum btrfs_subpage_type type) | |
760f991f | 160 | { |
651fb419 | 161 | struct btrfs_subpage *ret; |
72a69cd0 | 162 | unsigned int real_size; |
651fb419 | 163 | |
fdf250db | 164 | ASSERT(fs_info->sectorsize < PAGE_SIZE); |
760f991f | 165 | |
72a69cd0 QW |
166 | real_size = struct_size(ret, bitmaps, |
167 | BITS_TO_LONGS(fs_info->subpage_info->total_nr_bits)); | |
168 | ret = kzalloc(real_size, GFP_NOFS); | |
651fb419 QW |
169 | if (!ret) |
170 | return ERR_PTR(-ENOMEM); | |
171 | ||
172 | spin_lock_init(&ret->lock); | |
1e1de387 | 173 | if (type == BTRFS_SUBPAGE_METADATA) { |
651fb419 | 174 | atomic_set(&ret->eb_refs, 0); |
1e1de387 | 175 | } else { |
651fb419 QW |
176 | atomic_set(&ret->readers, 0); |
177 | atomic_set(&ret->writers, 0); | |
1e1de387 | 178 | } |
651fb419 | 179 | return ret; |
760f991f QW |
180 | } |
181 | ||
182 | void btrfs_free_subpage(struct btrfs_subpage *subpage) | |
183 | { | |
cac06d84 QW |
184 | kfree(subpage); |
185 | } | |
8ff8466d QW |
186 | |
187 | /* | |
188 | * Increase the eb_refs of current subpage. | |
189 | * | |
190 | * This is important for eb allocation, to prevent race with last eb freeing | |
191 | * of the same page. | |
192 | * With the eb_refs increased before the eb inserted into radix tree, | |
193 | * detach_extent_buffer_page() won't detach the page private while we're still | |
194 | * allocating the extent buffer. | |
195 | */ | |
196 | void btrfs_page_inc_eb_refs(const struct btrfs_fs_info *fs_info, | |
197 | struct page *page) | |
198 | { | |
199 | struct btrfs_subpage *subpage; | |
200 | ||
fbca46eb | 201 | if (!btrfs_is_subpage(fs_info, page)) |
8ff8466d QW |
202 | return; |
203 | ||
204 | ASSERT(PagePrivate(page) && page->mapping); | |
205 | lockdep_assert_held(&page->mapping->private_lock); | |
206 | ||
207 | subpage = (struct btrfs_subpage *)page->private; | |
208 | atomic_inc(&subpage->eb_refs); | |
209 | } | |
210 | ||
211 | void btrfs_page_dec_eb_refs(const struct btrfs_fs_info *fs_info, | |
212 | struct page *page) | |
213 | { | |
214 | struct btrfs_subpage *subpage; | |
215 | ||
fbca46eb | 216 | if (!btrfs_is_subpage(fs_info, page)) |
8ff8466d QW |
217 | return; |
218 | ||
219 | ASSERT(PagePrivate(page) && page->mapping); | |
220 | lockdep_assert_held(&page->mapping->private_lock); | |
221 | ||
222 | subpage = (struct btrfs_subpage *)page->private; | |
223 | ASSERT(atomic_read(&subpage->eb_refs)); | |
224 | atomic_dec(&subpage->eb_refs); | |
225 | } | |
a1d767c1 | 226 | |
92082d40 | 227 | static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, |
a1d767c1 QW |
228 | struct page *page, u64 start, u32 len) |
229 | { | |
a1d767c1 QW |
230 | /* Basic checks */ |
231 | ASSERT(PagePrivate(page) && page->private); | |
232 | ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && | |
233 | IS_ALIGNED(len, fs_info->sectorsize)); | |
a1d767c1 QW |
234 | /* |
235 | * The range check only works for mapped page, we can still have | |
236 | * unmapped page like dummy extent buffer pages. | |
237 | */ | |
238 | if (page->mapping) | |
239 | ASSERT(page_offset(page) <= start && | |
240 | start + len <= page_offset(page) + PAGE_SIZE); | |
92082d40 QW |
241 | } |
242 | ||
243 | void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info, | |
244 | struct page *page, u64 start, u32 len) | |
245 | { | |
246 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
247 | const int nbits = len >> fs_info->sectorsize_bits; | |
92082d40 QW |
248 | |
249 | btrfs_subpage_assert(fs_info, page, start, len); | |
250 | ||
3d078efa | 251 | atomic_add(nbits, &subpage->readers); |
92082d40 QW |
252 | } |
253 | ||
254 | void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info, | |
255 | struct page *page, u64 start, u32 len) | |
256 | { | |
257 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
258 | const int nbits = len >> fs_info->sectorsize_bits; | |
3d078efa QW |
259 | bool is_data; |
260 | bool last; | |
92082d40 QW |
261 | |
262 | btrfs_subpage_assert(fs_info, page, start, len); | |
3d078efa | 263 | is_data = is_data_inode(page->mapping->host); |
92082d40 | 264 | ASSERT(atomic_read(&subpage->readers) >= nbits); |
3d078efa QW |
265 | last = atomic_sub_and_test(nbits, &subpage->readers); |
266 | ||
267 | /* | |
268 | * For data we need to unlock the page if the last read has finished. | |
269 | * | |
270 | * And please don't replace @last with atomic_sub_and_test() call | |
271 | * inside if () condition. | |
272 | * As we want the atomic_sub_and_test() to be always executed. | |
273 | */ | |
274 | if (is_data && last) | |
92082d40 QW |
275 | unlock_page(page); |
276 | } | |
277 | ||
1e1de387 QW |
278 | static void btrfs_subpage_clamp_range(struct page *page, u64 *start, u32 *len) |
279 | { | |
280 | u64 orig_start = *start; | |
281 | u32 orig_len = *len; | |
282 | ||
283 | *start = max_t(u64, page_offset(page), orig_start); | |
e4f94347 QW |
284 | /* |
285 | * For certain call sites like btrfs_drop_pages(), we may have pages | |
286 | * beyond the target range. In that case, just set @len to 0, subpage | |
287 | * helpers can handle @len == 0 without any problem. | |
288 | */ | |
289 | if (page_offset(page) >= orig_start + orig_len) | |
290 | *len = 0; | |
291 | else | |
292 | *len = min_t(u64, page_offset(page) + PAGE_SIZE, | |
293 | orig_start + orig_len) - *start; | |
1e1de387 QW |
294 | } |
295 | ||
296 | void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info, | |
297 | struct page *page, u64 start, u32 len) | |
298 | { | |
299 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
300 | const int nbits = (len >> fs_info->sectorsize_bits); | |
301 | int ret; | |
302 | ||
303 | btrfs_subpage_assert(fs_info, page, start, len); | |
304 | ||
305 | ASSERT(atomic_read(&subpage->readers) == 0); | |
306 | ret = atomic_add_return(nbits, &subpage->writers); | |
307 | ASSERT(ret == nbits); | |
308 | } | |
309 | ||
310 | bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info, | |
311 | struct page *page, u64 start, u32 len) | |
312 | { | |
313 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
314 | const int nbits = (len >> fs_info->sectorsize_bits); | |
315 | ||
316 | btrfs_subpage_assert(fs_info, page, start, len); | |
317 | ||
164674a7 QW |
318 | /* |
319 | * We have call sites passing @lock_page into | |
320 | * extent_clear_unlock_delalloc() for compression path. | |
321 | * | |
322 | * This @locked_page is locked by plain lock_page(), thus its | |
323 | * subpage::writers is 0. Handle them in a special way. | |
324 | */ | |
325 | if (atomic_read(&subpage->writers) == 0) | |
326 | return true; | |
327 | ||
1e1de387 QW |
328 | ASSERT(atomic_read(&subpage->writers) >= nbits); |
329 | return atomic_sub_and_test(nbits, &subpage->writers); | |
330 | } | |
331 | ||
332 | /* | |
333 | * Lock a page for delalloc page writeback. | |
334 | * | |
335 | * Return -EAGAIN if the page is not properly initialized. | |
336 | * Return 0 with the page locked, and writer counter updated. | |
337 | * | |
338 | * Even with 0 returned, the page still need extra check to make sure | |
339 | * it's really the correct page, as the caller is using | |
47d55419 | 340 | * filemap_get_folios_contig(), which can race with page invalidating. |
1e1de387 QW |
341 | */ |
342 | int btrfs_page_start_writer_lock(const struct btrfs_fs_info *fs_info, | |
343 | struct page *page, u64 start, u32 len) | |
344 | { | |
fbca46eb | 345 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { |
1e1de387 QW |
346 | lock_page(page); |
347 | return 0; | |
348 | } | |
349 | lock_page(page); | |
350 | if (!PagePrivate(page) || !page->private) { | |
351 | unlock_page(page); | |
352 | return -EAGAIN; | |
353 | } | |
354 | btrfs_subpage_clamp_range(page, &start, &len); | |
355 | btrfs_subpage_start_writer(fs_info, page, start, len); | |
356 | return 0; | |
357 | } | |
358 | ||
359 | void btrfs_page_end_writer_lock(const struct btrfs_fs_info *fs_info, | |
360 | struct page *page, u64 start, u32 len) | |
361 | { | |
fbca46eb | 362 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) |
1e1de387 QW |
363 | return unlock_page(page); |
364 | btrfs_subpage_clamp_range(page, &start, &len); | |
365 | if (btrfs_subpage_end_and_test_writer(fs_info, page, start, len)) | |
366 | unlock_page(page); | |
367 | } | |
368 | ||
72a69cd0 QW |
369 | static bool bitmap_test_range_all_set(unsigned long *addr, unsigned int start, |
370 | unsigned int nbits) | |
92082d40 | 371 | { |
72a69cd0 | 372 | unsigned int found_zero; |
92082d40 | 373 | |
72a69cd0 QW |
374 | found_zero = find_next_zero_bit(addr, start + nbits, start); |
375 | if (found_zero == start + nbits) | |
376 | return true; | |
377 | return false; | |
378 | } | |
92082d40 | 379 | |
72a69cd0 QW |
380 | static bool bitmap_test_range_all_zero(unsigned long *addr, unsigned int start, |
381 | unsigned int nbits) | |
382 | { | |
383 | unsigned int found_set; | |
384 | ||
385 | found_set = find_next_bit(addr, start + nbits, start); | |
386 | if (found_set == start + nbits) | |
387 | return true; | |
388 | return false; | |
a1d767c1 QW |
389 | } |
390 | ||
72a69cd0 QW |
391 | #define subpage_calc_start_bit(fs_info, page, name, start, len) \ |
392 | ({ \ | |
393 | unsigned int start_bit; \ | |
394 | \ | |
395 | btrfs_subpage_assert(fs_info, page, start, len); \ | |
396 | start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \ | |
397 | start_bit += fs_info->subpage_info->name##_offset; \ | |
398 | start_bit; \ | |
399 | }) | |
400 | ||
401 | #define subpage_test_bitmap_all_set(fs_info, subpage, name) \ | |
402 | bitmap_test_range_all_set(subpage->bitmaps, \ | |
403 | fs_info->subpage_info->name##_offset, \ | |
404 | fs_info->subpage_info->bitmap_nr_bits) | |
405 | ||
406 | #define subpage_test_bitmap_all_zero(fs_info, subpage, name) \ | |
407 | bitmap_test_range_all_zero(subpage->bitmaps, \ | |
408 | fs_info->subpage_info->name##_offset, \ | |
409 | fs_info->subpage_info->bitmap_nr_bits) | |
410 | ||
a1d767c1 QW |
411 | void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info, |
412 | struct page *page, u64 start, u32 len) | |
413 | { | |
414 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
415 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
416 | uptodate, start, len); | |
a1d767c1 QW |
417 | unsigned long flags; |
418 | ||
419 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 QW |
420 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
421 | if (subpage_test_bitmap_all_set(fs_info, subpage, uptodate)) | |
a1d767c1 QW |
422 | SetPageUptodate(page); |
423 | spin_unlock_irqrestore(&subpage->lock, flags); | |
424 | } | |
425 | ||
426 | void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info, | |
427 | struct page *page, u64 start, u32 len) | |
428 | { | |
429 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
430 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
431 | uptodate, start, len); | |
a1d767c1 QW |
432 | unsigned long flags; |
433 | ||
434 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 | 435 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
a1d767c1 QW |
436 | ClearPageUptodate(page); |
437 | spin_unlock_irqrestore(&subpage->lock, flags); | |
438 | } | |
439 | ||
03a816b3 QW |
440 | void btrfs_subpage_set_error(const struct btrfs_fs_info *fs_info, |
441 | struct page *page, u64 start, u32 len) | |
442 | { | |
443 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
444 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
445 | error, start, len); | |
03a816b3 QW |
446 | unsigned long flags; |
447 | ||
448 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 | 449 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
03a816b3 QW |
450 | SetPageError(page); |
451 | spin_unlock_irqrestore(&subpage->lock, flags); | |
452 | } | |
453 | ||
454 | void btrfs_subpage_clear_error(const struct btrfs_fs_info *fs_info, | |
455 | struct page *page, u64 start, u32 len) | |
456 | { | |
457 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
458 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
459 | error, start, len); | |
03a816b3 QW |
460 | unsigned long flags; |
461 | ||
462 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 QW |
463 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
464 | if (subpage_test_bitmap_all_zero(fs_info, subpage, error)) | |
03a816b3 QW |
465 | ClearPageError(page); |
466 | spin_unlock_irqrestore(&subpage->lock, flags); | |
467 | } | |
468 | ||
d8a5713e QW |
469 | void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info, |
470 | struct page *page, u64 start, u32 len) | |
471 | { | |
472 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
473 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
474 | dirty, start, len); | |
d8a5713e QW |
475 | unsigned long flags; |
476 | ||
477 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 | 478 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
d8a5713e QW |
479 | spin_unlock_irqrestore(&subpage->lock, flags); |
480 | set_page_dirty(page); | |
481 | } | |
482 | ||
483 | /* | |
484 | * Extra clear_and_test function for subpage dirty bitmap. | |
485 | * | |
486 | * Return true if we're the last bits in the dirty_bitmap and clear the | |
487 | * dirty_bitmap. | |
488 | * Return false otherwise. | |
489 | * | |
490 | * NOTE: Callers should manually clear page dirty for true case, as we have | |
491 | * extra handling for tree blocks. | |
492 | */ | |
493 | bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info, | |
494 | struct page *page, u64 start, u32 len) | |
495 | { | |
496 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
497 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
498 | dirty, start, len); | |
d8a5713e QW |
499 | unsigned long flags; |
500 | bool last = false; | |
501 | ||
502 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 QW |
503 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
504 | if (subpage_test_bitmap_all_zero(fs_info, subpage, dirty)) | |
d8a5713e QW |
505 | last = true; |
506 | spin_unlock_irqrestore(&subpage->lock, flags); | |
507 | return last; | |
508 | } | |
509 | ||
510 | void btrfs_subpage_clear_dirty(const struct btrfs_fs_info *fs_info, | |
511 | struct page *page, u64 start, u32 len) | |
512 | { | |
513 | bool last; | |
514 | ||
515 | last = btrfs_subpage_clear_and_test_dirty(fs_info, page, start, len); | |
516 | if (last) | |
517 | clear_page_dirty_for_io(page); | |
518 | } | |
519 | ||
3470da3b QW |
520 | void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, |
521 | struct page *page, u64 start, u32 len) | |
522 | { | |
523 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
524 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
525 | writeback, start, len); | |
3470da3b QW |
526 | unsigned long flags; |
527 | ||
528 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 | 529 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
3470da3b QW |
530 | set_page_writeback(page); |
531 | spin_unlock_irqrestore(&subpage->lock, flags); | |
532 | } | |
533 | ||
534 | void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info, | |
535 | struct page *page, u64 start, u32 len) | |
536 | { | |
537 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
538 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
539 | writeback, start, len); | |
3470da3b QW |
540 | unsigned long flags; |
541 | ||
542 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 QW |
543 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
544 | if (subpage_test_bitmap_all_zero(fs_info, subpage, writeback)) { | |
7c11d0ae | 545 | ASSERT(PageWriteback(page)); |
3470da3b | 546 | end_page_writeback(page); |
7c11d0ae | 547 | } |
3470da3b QW |
548 | spin_unlock_irqrestore(&subpage->lock, flags); |
549 | } | |
550 | ||
6f17400b QW |
551 | void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info, |
552 | struct page *page, u64 start, u32 len) | |
553 | { | |
554 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
555 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
556 | ordered, start, len); | |
6f17400b QW |
557 | unsigned long flags; |
558 | ||
559 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 | 560 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
6f17400b QW |
561 | SetPageOrdered(page); |
562 | spin_unlock_irqrestore(&subpage->lock, flags); | |
563 | } | |
564 | ||
565 | void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info, | |
566 | struct page *page, u64 start, u32 len) | |
567 | { | |
568 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
72a69cd0 QW |
569 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, |
570 | ordered, start, len); | |
6f17400b QW |
571 | unsigned long flags; |
572 | ||
573 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 QW |
574 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
575 | if (subpage_test_bitmap_all_zero(fs_info, subpage, ordered)) | |
6f17400b QW |
576 | ClearPageOrdered(page); |
577 | spin_unlock_irqrestore(&subpage->lock, flags); | |
578 | } | |
e4f94347 QW |
579 | |
580 | void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info, | |
581 | struct page *page, u64 start, u32 len) | |
582 | { | |
583 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
584 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, | |
585 | checked, start, len); | |
586 | unsigned long flags; | |
587 | ||
588 | spin_lock_irqsave(&subpage->lock, flags); | |
589 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); | |
590 | if (subpage_test_bitmap_all_set(fs_info, subpage, checked)) | |
591 | SetPageChecked(page); | |
592 | spin_unlock_irqrestore(&subpage->lock, flags); | |
593 | } | |
594 | ||
595 | void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info, | |
596 | struct page *page, u64 start, u32 len) | |
597 | { | |
598 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
599 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, | |
600 | checked, start, len); | |
601 | unsigned long flags; | |
602 | ||
603 | spin_lock_irqsave(&subpage->lock, flags); | |
604 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); | |
605 | ClearPageChecked(page); | |
606 | spin_unlock_irqrestore(&subpage->lock, flags); | |
607 | } | |
608 | ||
a1d767c1 QW |
609 | /* |
610 | * Unlike set/clear which is dependent on each page status, for test all bits | |
611 | * are tested in the same way. | |
612 | */ | |
613 | #define IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(name) \ | |
614 | bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \ | |
615 | struct page *page, u64 start, u32 len) \ | |
616 | { \ | |
617 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; \ | |
72a69cd0 QW |
618 | unsigned int start_bit = subpage_calc_start_bit(fs_info, page, \ |
619 | name, start, len); \ | |
a1d767c1 QW |
620 | unsigned long flags; \ |
621 | bool ret; \ | |
622 | \ | |
623 | spin_lock_irqsave(&subpage->lock, flags); \ | |
72a69cd0 QW |
624 | ret = bitmap_test_range_all_set(subpage->bitmaps, start_bit, \ |
625 | len >> fs_info->sectorsize_bits); \ | |
a1d767c1 QW |
626 | spin_unlock_irqrestore(&subpage->lock, flags); \ |
627 | return ret; \ | |
628 | } | |
629 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate); | |
03a816b3 | 630 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(error); |
d8a5713e | 631 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty); |
3470da3b | 632 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback); |
6f17400b | 633 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered); |
e4f94347 | 634 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked); |
a1d767c1 QW |
635 | |
636 | /* | |
637 | * Note that, in selftests (extent-io-tests), we can have empty fs_info passed | |
638 | * in. We only test sectorsize == PAGE_SIZE cases so far, thus we can fall | |
639 | * back to regular sectorsize branch. | |
640 | */ | |
641 | #define IMPLEMENT_BTRFS_PAGE_OPS(name, set_page_func, clear_page_func, \ | |
642 | test_page_func) \ | |
643 | void btrfs_page_set_##name(const struct btrfs_fs_info *fs_info, \ | |
644 | struct page *page, u64 start, u32 len) \ | |
645 | { \ | |
fbca46eb | 646 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { \ |
a1d767c1 QW |
647 | set_page_func(page); \ |
648 | return; \ | |
649 | } \ | |
650 | btrfs_subpage_set_##name(fs_info, page, start, len); \ | |
651 | } \ | |
652 | void btrfs_page_clear_##name(const struct btrfs_fs_info *fs_info, \ | |
653 | struct page *page, u64 start, u32 len) \ | |
654 | { \ | |
fbca46eb | 655 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { \ |
a1d767c1 QW |
656 | clear_page_func(page); \ |
657 | return; \ | |
658 | } \ | |
659 | btrfs_subpage_clear_##name(fs_info, page, start, len); \ | |
660 | } \ | |
661 | bool btrfs_page_test_##name(const struct btrfs_fs_info *fs_info, \ | |
662 | struct page *page, u64 start, u32 len) \ | |
663 | { \ | |
fbca46eb | 664 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) \ |
a1d767c1 QW |
665 | return test_page_func(page); \ |
666 | return btrfs_subpage_test_##name(fs_info, page, start, len); \ | |
60e2d255 QW |
667 | } \ |
668 | void btrfs_page_clamp_set_##name(const struct btrfs_fs_info *fs_info, \ | |
669 | struct page *page, u64 start, u32 len) \ | |
670 | { \ | |
fbca46eb | 671 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { \ |
60e2d255 QW |
672 | set_page_func(page); \ |
673 | return; \ | |
674 | } \ | |
675 | btrfs_subpage_clamp_range(page, &start, &len); \ | |
676 | btrfs_subpage_set_##name(fs_info, page, start, len); \ | |
677 | } \ | |
678 | void btrfs_page_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \ | |
679 | struct page *page, u64 start, u32 len) \ | |
680 | { \ | |
fbca46eb | 681 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) { \ |
60e2d255 QW |
682 | clear_page_func(page); \ |
683 | return; \ | |
684 | } \ | |
685 | btrfs_subpage_clamp_range(page, &start, &len); \ | |
686 | btrfs_subpage_clear_##name(fs_info, page, start, len); \ | |
687 | } \ | |
688 | bool btrfs_page_clamp_test_##name(const struct btrfs_fs_info *fs_info, \ | |
689 | struct page *page, u64 start, u32 len) \ | |
690 | { \ | |
fbca46eb | 691 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, page)) \ |
60e2d255 QW |
692 | return test_page_func(page); \ |
693 | btrfs_subpage_clamp_range(page, &start, &len); \ | |
694 | return btrfs_subpage_test_##name(fs_info, page, start, len); \ | |
a1d767c1 QW |
695 | } |
696 | IMPLEMENT_BTRFS_PAGE_OPS(uptodate, SetPageUptodate, ClearPageUptodate, | |
697 | PageUptodate); | |
03a816b3 | 698 | IMPLEMENT_BTRFS_PAGE_OPS(error, SetPageError, ClearPageError, PageError); |
d8a5713e QW |
699 | IMPLEMENT_BTRFS_PAGE_OPS(dirty, set_page_dirty, clear_page_dirty_for_io, |
700 | PageDirty); | |
3470da3b QW |
701 | IMPLEMENT_BTRFS_PAGE_OPS(writeback, set_page_writeback, end_page_writeback, |
702 | PageWriteback); | |
6f17400b QW |
703 | IMPLEMENT_BTRFS_PAGE_OPS(ordered, SetPageOrdered, ClearPageOrdered, |
704 | PageOrdered); | |
e4f94347 | 705 | IMPLEMENT_BTRFS_PAGE_OPS(checked, SetPageChecked, ClearPageChecked, PageChecked); |
cc1d0d93 QW |
706 | |
707 | /* | |
708 | * Make sure not only the page dirty bit is cleared, but also subpage dirty bit | |
709 | * is cleared. | |
710 | */ | |
711 | void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info, | |
712 | struct page *page) | |
713 | { | |
714 | struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; | |
715 | ||
716 | if (!IS_ENABLED(CONFIG_BTRFS_ASSERT)) | |
717 | return; | |
718 | ||
719 | ASSERT(!PageDirty(page)); | |
fbca46eb | 720 | if (!btrfs_is_subpage(fs_info, page)) |
cc1d0d93 QW |
721 | return; |
722 | ||
723 | ASSERT(PagePrivate(page) && page->private); | |
72a69cd0 | 724 | ASSERT(subpage_test_bitmap_all_zero(fs_info, subpage, dirty)); |
cc1d0d93 | 725 | } |
e55a0de1 QW |
726 | |
727 | /* | |
728 | * Handle different locked pages with different page sizes: | |
729 | * | |
730 | * - Page locked by plain lock_page() | |
731 | * It should not have any subpage::writers count. | |
732 | * Can be unlocked by unlock_page(). | |
733 | * This is the most common locked page for __extent_writepage() called | |
f3e90c1c | 734 | * inside extent_write_cache_pages(). |
e55a0de1 QW |
735 | * Rarer cases include the @locked_page from extent_write_locked_range(). |
736 | * | |
737 | * - Page locked by lock_delalloc_pages() | |
738 | * There is only one caller, all pages except @locked_page for | |
739 | * extent_write_locked_range(). | |
740 | * In this case, we have to call subpage helper to handle the case. | |
741 | */ | |
742 | void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page, | |
743 | u64 start, u32 len) | |
744 | { | |
745 | struct btrfs_subpage *subpage; | |
746 | ||
747 | ASSERT(PageLocked(page)); | |
fbca46eb QW |
748 | /* For non-subpage case, we just unlock the page */ |
749 | if (!btrfs_is_subpage(fs_info, page)) | |
e55a0de1 QW |
750 | return unlock_page(page); |
751 | ||
752 | ASSERT(PagePrivate(page) && page->private); | |
753 | subpage = (struct btrfs_subpage *)page->private; | |
754 | ||
755 | /* | |
756 | * For subpage case, there are two types of locked page. With or | |
757 | * without writers number. | |
758 | * | |
759 | * Since we own the page lock, no one else could touch subpage::writers | |
760 | * and we are safe to do several atomic operations without spinlock. | |
761 | */ | |
c992fa1f | 762 | if (atomic_read(&subpage->writers) == 0) |
e55a0de1 QW |
763 | /* No writers, locked by plain lock_page() */ |
764 | return unlock_page(page); | |
765 | ||
766 | /* Have writers, use proper subpage helper to end it */ | |
767 | btrfs_page_end_writer_lock(fs_info, page, start, len); | |
768 | } |