mm: page_isolation: move has_unmovable_pages() to mm/page_isolation.c
[linux-2.6-block.git] / mm / page_isolation.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
a5d76b54
KH
2/*
3 * linux/mm/page_isolation.c
4 */
5
a5d76b54
KH
6#include <linux/mm.h>
7#include <linux/page-isolation.h>
8#include <linux/pageblock-flags.h>
ee6f509c 9#include <linux/memory.h>
c8721bbb 10#include <linux/hugetlb.h>
83358ece 11#include <linux/page_owner.h>
8b913238 12#include <linux/migrate.h>
a5d76b54
KH
13#include "internal.h"
14
0f0848e5
JK
15#define CREATE_TRACE_POINTS
16#include <trace/events/page_isolation.h>
17
b48d8a8e
ZY
18/*
19 * This function checks whether pageblock includes unmovable pages or not.
20 *
21 * PageLRU check without isolation or lru_lock could race so that
22 * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable
23 * check without lock_page also may miss some movable non-lru pages at
24 * race condition. So you can't expect this function should be exact.
25 *
26 * Returns a page without holding a reference. If the caller wants to
27 * dereference that page (e.g., dumping), it has to make sure that it
28 * cannot get removed (e.g., via memory unplug) concurrently.
29 *
30 */
31static struct page *has_unmovable_pages(struct zone *zone, struct page *page,
32 int migratetype, int flags)
33{
34 unsigned long iter = 0;
35 unsigned long pfn = page_to_pfn(page);
36 unsigned long offset = pfn % pageblock_nr_pages;
37
38 if (is_migrate_cma_page(page)) {
39 /*
40 * CMA allocations (alloc_contig_range) really need to mark
41 * isolate CMA pageblocks even when they are not movable in fact
42 * so consider them movable here.
43 */
44 if (is_migrate_cma(migratetype))
45 return NULL;
46
47 return page;
48 }
49
50 for (; iter < pageblock_nr_pages - offset; iter++) {
51 page = pfn_to_page(pfn + iter);
52
53 /*
54 * Both, bootmem allocations and memory holes are marked
55 * PG_reserved and are unmovable. We can even have unmovable
56 * allocations inside ZONE_MOVABLE, for example when
57 * specifying "movablecore".
58 */
59 if (PageReserved(page))
60 return page;
61
62 /*
63 * If the zone is movable and we have ruled out all reserved
64 * pages then it should be reasonably safe to assume the rest
65 * is movable.
66 */
67 if (zone_idx(zone) == ZONE_MOVABLE)
68 continue;
69
70 /*
71 * Hugepages are not in LRU lists, but they're movable.
72 * THPs are on the LRU, but need to be counted as #small pages.
73 * We need not scan over tail pages because we don't
74 * handle each tail page individually in migration.
75 */
76 if (PageHuge(page) || PageTransCompound(page)) {
77 struct page *head = compound_head(page);
78 unsigned int skip_pages;
79
80 if (PageHuge(page)) {
81 if (!hugepage_migration_supported(page_hstate(head)))
82 return page;
83 } else if (!PageLRU(head) && !__PageMovable(head)) {
84 return page;
85 }
86
87 skip_pages = compound_nr(head) - (page - head);
88 iter += skip_pages - 1;
89 continue;
90 }
91
92 /*
93 * We can't use page_count without pin a page
94 * because another CPU can free compound page.
95 * This check already skips compound tails of THP
96 * because their page->_refcount is zero at all time.
97 */
98 if (!page_ref_count(page)) {
99 if (PageBuddy(page))
100 iter += (1 << buddy_order(page)) - 1;
101 continue;
102 }
103
104 /*
105 * The HWPoisoned page may be not in buddy system, and
106 * page_count() is not 0.
107 */
108 if ((flags & MEMORY_OFFLINE) && PageHWPoison(page))
109 continue;
110
111 /*
112 * We treat all PageOffline() pages as movable when offlining
113 * to give drivers a chance to decrement their reference count
114 * in MEM_GOING_OFFLINE in order to indicate that these pages
115 * can be offlined as there are no direct references anymore.
116 * For actually unmovable PageOffline() where the driver does
117 * not support this, we will fail later when trying to actually
118 * move these pages that still have a reference count > 0.
119 * (false negatives in this function only)
120 */
121 if ((flags & MEMORY_OFFLINE) && PageOffline(page))
122 continue;
123
124 if (__PageMovable(page) || PageLRU(page))
125 continue;
126
127 /*
128 * If there are RECLAIMABLE pages, we need to check
129 * it. But now, memory offline itself doesn't call
130 * shrink_node_slabs() and it still to be fixed.
131 */
132 return page;
133 }
134 return NULL;
135}
136
d381c547 137static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags)
ee6f509c 138{
1c31cb49
DH
139 struct zone *zone = page_zone(page);
140 struct page *unmovable;
3f9903b9 141 unsigned long flags;
ee6f509c
MK
142
143 spin_lock_irqsave(&zone->lock, flags);
144
2c7452a0
MK
145 /*
146 * We assume the caller intended to SET migrate type to isolate.
147 * If it is already set, then someone else must have raced and
51030a53 148 * set it before us.
2c7452a0 149 */
51030a53
DH
150 if (is_migrate_isolate_page(page)) {
151 spin_unlock_irqrestore(&zone->lock, flags);
152 return -EBUSY;
153 }
2c7452a0 154
ee6f509c
MK
155 /*
156 * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
157 * We just check MOVABLE pages.
158 */
4a55c047
QC
159 unmovable = has_unmovable_pages(zone, page, migratetype, isol_flags);
160 if (!unmovable) {
2139cbe6 161 unsigned long nr_pages;
4da2ce25 162 int mt = get_pageblock_migratetype(page);
2139cbe6 163
a458431e 164 set_pageblock_migratetype(page, MIGRATE_ISOLATE);
ad53f92e 165 zone->nr_isolate_pageblock++;
02aa0cdd
VB
166 nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE,
167 NULL);
2139cbe6 168
4da2ce25 169 __mod_zone_freepage_state(zone, -nr_pages, mt);
1c31cb49 170 spin_unlock_irqrestore(&zone->lock, flags);
1c31cb49 171 return 0;
ee6f509c
MK
172 }
173
174 spin_unlock_irqrestore(&zone->lock, flags);
1c31cb49 175 if (isol_flags & REPORT_FAILURE) {
48381d7e
DH
176 /*
177 * printk() with zone->lock held will likely trigger a
178 * lockdep splat, so defer it here.
179 */
180 dump_page(unmovable, "unmovable page");
3d680bdf 181 }
4a55c047 182
1c31cb49 183 return -EBUSY;
ee6f509c
MK
184}
185
c5b4e1b0 186static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
ee6f509c
MK
187{
188 struct zone *zone;
2139cbe6 189 unsigned long flags, nr_pages;
e3a2713c 190 bool isolated_page = false;
3c605096 191 unsigned int order;
3c605096 192 struct page *buddy;
2139cbe6 193
ee6f509c
MK
194 zone = page_zone(page);
195 spin_lock_irqsave(&zone->lock, flags);
bbf9ce97 196 if (!is_migrate_isolate_page(page))
ee6f509c 197 goto out;
3c605096
JK
198
199 /*
200 * Because freepage with more than pageblock_order on isolated
201 * pageblock is restricted to merge due to freepage counting problem,
202 * it is possible that there is free buddy page.
203 * move_freepages_block() doesn't care of merge so we need other
204 * approach in order to merge them. Isolation and free will make
205 * these pages to be merged.
206 */
207 if (PageBuddy(page)) {
ab130f91 208 order = buddy_order(page);
2484be0f 209 if (order >= pageblock_order && order < MAX_ORDER - 1) {
8170ac47
ZY
210 buddy = find_buddy_page_pfn(page, page_to_pfn(page),
211 order, NULL);
212 if (buddy && !is_migrate_isolate_page(buddy)) {
a500cb34
ML
213 isolated_page = !!__isolate_free_page(page, order);
214 /*
215 * Isolating a free page in an isolated pageblock
216 * is expected to always work as watermarks don't
217 * apply here.
218 */
219 VM_WARN_ON(!isolated_page);
3c605096
JK
220 }
221 }
222 }
223
224 /*
225 * If we isolate freepage with more than pageblock_order, there
226 * should be no freepage in the range, so we could avoid costly
227 * pageblock scanning for freepage moving.
293ffa5e
DH
228 *
229 * We didn't actually touch any of the isolated pages, so place them
230 * to the tail of the freelist. This is an optimization for memory
231 * onlining - just onlined memory won't immediately be considered for
232 * allocation.
3c605096 233 */
a85468b7 234 if (!isolated_page) {
02aa0cdd 235 nr_pages = move_freepages_block(zone, page, migratetype, NULL);
3c605096
JK
236 __mod_zone_freepage_state(zone, nr_pages, migratetype);
237 }
a458431e 238 set_pageblock_migratetype(page, migratetype);
624f58d8
AD
239 if (isolated_page)
240 __putback_isolated_page(page, order, migratetype);
ad53f92e 241 zone->nr_isolate_pageblock--;
ee6f509c
MK
242out:
243 spin_unlock_irqrestore(&zone->lock, flags);
244}
245
a5d76b54
KH
246static inline struct page *
247__first_valid_page(unsigned long pfn, unsigned long nr_pages)
248{
249 int i;
2ce13640
MH
250
251 for (i = 0; i < nr_pages; i++) {
252 struct page *page;
253
2ce13640
MH
254 page = pfn_to_online_page(pfn + i);
255 if (!page)
256 continue;
257 return page;
258 }
259 return NULL;
a5d76b54
KH
260}
261
9b7ea46a
QC
262/**
263 * start_isolate_page_range() - make page-allocation-type of range of pages to
264 * be MIGRATE_ISOLATE.
265 * @start_pfn: The lower PFN of the range to be isolated.
266 * @end_pfn: The upper PFN of the range to be isolated.
267 * start_pfn/end_pfn must be aligned to pageblock_order.
268 * @migratetype: Migrate type to set in error recovery.
269 * @flags: The following flags are allowed (they can be combined in
270 * a bit mask)
756d25be
DH
271 * MEMORY_OFFLINE - isolate to offline (!allocate) memory
272 * e.g., skip over PageHWPoison() pages
aa218795 273 * and PageOffline() pages.
9b7ea46a
QC
274 * REPORT_FAILURE - report details about the failure to
275 * isolate the range
a5d76b54
KH
276 *
277 * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
278 * the range will never be allocated. Any free pages and pages freed in the
9b7ea46a
QC
279 * future will not be allocated again. If specified range includes migrate types
280 * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all
281 * pages in the range finally, the caller have to free all pages in the range.
282 * test_page_isolated() can be used for test it.
2c7452a0
MK
283 *
284 * There is no high level synchronization mechanism that prevents two threads
9b7ea46a 285 * from trying to isolate overlapping ranges. If this happens, one thread
2c7452a0
MK
286 * will notice pageblocks in the overlapping range already set to isolate.
287 * This happens in set_migratetype_isolate, and set_migratetype_isolate
9b7ea46a
QC
288 * returns an error. We then clean up by restoring the migration type on
289 * pageblocks we may have modified and return -EBUSY to caller. This
2c7452a0 290 * prevents two threads from simultaneously working on overlapping ranges.
9b7ea46a 291 *
96831826
PT
292 * Please note that there is no strong synchronization with the page allocator
293 * either. Pages might be freed while their page blocks are marked ISOLATED.
7612921f
VB
294 * A call to drain_all_pages() after isolation can flush most of them. However
295 * in some cases pages might still end up on pcp lists and that would allow
96831826 296 * for their allocation even when they are in fact isolated already. Depending
ec6e8c7e
VB
297 * on how strong of a guarantee the caller needs, zone_pcp_disable/enable()
298 * might be used to flush and disable pcplist before isolation and enable after
299 * unisolation.
96831826 300 *
3fa0c7c7 301 * Return: 0 on success and -EBUSY if any part of range cannot be isolated.
a5d76b54 302 */
0815f3d8 303int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
d381c547 304 unsigned migratetype, int flags)
a5d76b54
KH
305{
306 unsigned long pfn;
a5d76b54
KH
307 struct page *page;
308
fec174d6
NH
309 BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages));
310 BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages));
a5d76b54
KH
311
312 for (pfn = start_pfn;
313 pfn < end_pfn;
314 pfn += pageblock_nr_pages) {
315 page = __first_valid_page(pfn, pageblock_nr_pages);
e1d8c966
ML
316 if (page && set_migratetype_isolate(page, migratetype, flags)) {
317 undo_isolate_page_range(start_pfn, pfn, migratetype);
318 return -EBUSY;
a5d76b54
KH
319 }
320 }
3fa0c7c7 321 return 0;
a5d76b54
KH
322}
323
324/*
325 * Make isolated pages available again.
326 */
1fcf0a56 327void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
0815f3d8 328 unsigned migratetype)
a5d76b54
KH
329{
330 unsigned long pfn;
331 struct page *page;
6f8d2b8a
WX
332
333 BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages));
334 BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages));
335
a5d76b54
KH
336 for (pfn = start_pfn;
337 pfn < end_pfn;
338 pfn += pageblock_nr_pages) {
339 page = __first_valid_page(pfn, pageblock_nr_pages);
bbf9ce97 340 if (!page || !is_migrate_isolate_page(page))
a5d76b54 341 continue;
0815f3d8 342 unset_migratetype_isolate(page, migratetype);
a5d76b54 343 }
a5d76b54
KH
344}
345/*
346 * Test all pages in the range is free(means isolated) or not.
347 * all pages in [start_pfn...end_pfn) must be in the same zone.
348 * zone->lock must be held before call this.
349 *
ec3b6882 350 * Returns the last tested pfn.
a5d76b54 351 */
fea85cff 352static unsigned long
b023f468 353__test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn,
756d25be 354 int flags)
a5d76b54
KH
355{
356 struct page *page;
357
358 while (pfn < end_pfn) {
a5d76b54 359 page = pfn_to_page(pfn);
aa016d14 360 if (PageBuddy(page))
435b405c 361 /*
aa016d14
VB
362 * If the page is on a free list, it has to be on
363 * the correct MIGRATE_ISOLATE freelist. There is no
364 * simple way to verify that as VM_BUG_ON(), though.
435b405c 365 */
ab130f91 366 pfn += 1 << buddy_order(page);
756d25be 367 else if ((flags & MEMORY_OFFLINE) && PageHWPoison(page))
aa016d14 368 /* A HWPoisoned page cannot be also PageBuddy */
b023f468 369 pfn++;
aa218795
DH
370 else if ((flags & MEMORY_OFFLINE) && PageOffline(page) &&
371 !page_count(page))
372 /*
373 * The responsible driver agreed to skip PageOffline()
374 * pages when offlining memory by dropping its
375 * reference in MEM_GOING_OFFLINE.
376 */
377 pfn++;
a5d76b54
KH
378 else
379 break;
380 }
fea85cff
JK
381
382 return pfn;
a5d76b54
KH
383}
384
b9eb6319 385/* Caller should ensure that requested range is in a single zone */
b023f468 386int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
756d25be 387 int isol_flags)
a5d76b54 388{
6c1b7f68 389 unsigned long pfn, flags;
a5d76b54 390 struct page *page;
6c1b7f68 391 struct zone *zone;
1d09510b 392 int ret;
a5d76b54 393
a5d76b54 394 /*
85dbe706
TC
395 * Note: pageblock_nr_pages != MAX_ORDER. Then, chunks of free pages
396 * are not aligned to pageblock_nr_pages.
397 * Then we just check migratetype first.
a5d76b54
KH
398 */
399 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
400 page = __first_valid_page(pfn, pageblock_nr_pages);
bbf9ce97 401 if (page && !is_migrate_isolate_page(page))
a5d76b54
KH
402 break;
403 }
a70dcb96 404 page = __first_valid_page(start_pfn, end_pfn - start_pfn);
1d09510b
GD
405 if ((pfn < end_pfn) || !page) {
406 ret = -EBUSY;
407 goto out;
408 }
409
85dbe706 410 /* Check all pages are free or marked as ISOLATED */
a70dcb96 411 zone = page_zone(page);
6c1b7f68 412 spin_lock_irqsave(&zone->lock, flags);
756d25be 413 pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, isol_flags);
6c1b7f68 414 spin_unlock_irqrestore(&zone->lock, flags);
fea85cff 415
1d09510b
GD
416 ret = pfn < end_pfn ? -EBUSY : 0;
417
418out:
0f0848e5
JK
419 trace_test_pages_isolated(start_pfn, end_pfn, pfn);
420
1d09510b 421 return ret;
a5d76b54 422}