mm: make folios_put() the basis of release_pages()
[linux-2.6-block.git] / mm / swap.c
CommitLineData
457c8996 1// SPDX-License-Identifier: GPL-2.0-only
1da177e4
LT
2/*
3 * linux/mm/swap.c
4 *
5 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
6 */
7
8/*
183ff22b 9 * This file contains the default values for the operation of the
1da177e4 10 * Linux VM subsystem. Fine-tuning documentation can be found in
57043247 11 * Documentation/admin-guide/sysctl/vm.rst.
1da177e4
LT
12 * Started 18.12.91
13 * Swap aging added 23.2.95, Stephen Tweedie.
14 * Buffermem limits added 12.3.98, Rik van Riel.
15 */
16
17#include <linux/mm.h>
18#include <linux/sched.h>
19#include <linux/kernel_stat.h>
20#include <linux/swap.h>
21#include <linux/mman.h>
22#include <linux/pagemap.h>
23#include <linux/pagevec.h>
24#include <linux/init.h>
b95f1b31 25#include <linux/export.h>
1da177e4 26#include <linux/mm_inline.h>
1da177e4 27#include <linux/percpu_counter.h>
3565fce3 28#include <linux/memremap.h>
1da177e4
LT
29#include <linux/percpu.h>
30#include <linux/cpu.h>
31#include <linux/notifier.h>
e0bf68dd 32#include <linux/backing-dev.h>
66e1707b 33#include <linux/memcontrol.h>
5a0e3ad6 34#include <linux/gfp.h>
a27bb332 35#include <linux/uio.h>
822fc613 36#include <linux/hugetlb.h>
33c3fc71 37#include <linux/page_idle.h>
b01b2141 38#include <linux/local_lock.h>
8cc621d2 39#include <linux/buffer_head.h>
1da177e4 40
64d6519d
LS
41#include "internal.h"
42
c6286c98
MG
43#define CREATE_TRACE_POINTS
44#include <trace/events/pagemap.h>
45
ea0ffd0c 46/* How many pages do we try to swap or page in/out together? As a power of 2 */
1da177e4 47int page_cluster;
ea0ffd0c 48const int page_cluster_max = 31;
1da177e4 49
c2bc1681 50/* Protecting only lru_rotate.fbatch which requires disabling interrupts */
b01b2141
IM
51struct lru_rotate {
52 local_lock_t lock;
c2bc1681 53 struct folio_batch fbatch;
b01b2141
IM
54};
55static DEFINE_PER_CPU(struct lru_rotate, lru_rotate) = {
56 .lock = INIT_LOCAL_LOCK(lock),
57};
58
59/*
82ac64d8 60 * The following folio batches are grouped together because they are protected
b01b2141
IM
61 * by disabling preemption (and interrupts remain enabled).
62 */
82ac64d8 63struct cpu_fbatches {
b01b2141 64 local_lock_t lock;
70dea534 65 struct folio_batch lru_add;
7a3dbfe8 66 struct folio_batch lru_deactivate_file;
85cd7791 67 struct folio_batch lru_deactivate;
cec394ba 68 struct folio_batch lru_lazyfree;
a4a921aa 69#ifdef CONFIG_SMP
3a44610b 70 struct folio_batch activate;
a4a921aa 71#endif
b01b2141 72};
82ac64d8 73static DEFINE_PER_CPU(struct cpu_fbatches, cpu_fbatches) = {
b01b2141
IM
74 .lock = INIT_LOCAL_LOCK(lock),
75};
902aaed0 76
b221385b 77/*
b109b870 78 * This path almost never happens for VM activity - pages are normally freed
1fec6890 79 * in batches. But it gets used by networking - and for compound pages.
b221385b 80 */
188e8cae 81static void __page_cache_release(struct folio *folio)
b221385b 82{
188e8cae 83 if (folio_test_lru(folio)) {
fa9add64
HD
84 struct lruvec *lruvec;
85 unsigned long flags;
b221385b 86
e809c3fe 87 lruvec = folio_lruvec_lock_irqsave(folio, &flags);
188e8cae
MWO
88 lruvec_del_folio(lruvec, folio);
89 __folio_clear_lru_flags(folio);
6168d0da 90 unlock_page_lruvec_irqrestore(lruvec, flags);
b221385b 91 }
99fbb6bf 92 /* See comment on folio_test_mlocked in folios_put() */
188e8cae
MWO
93 if (unlikely(folio_test_mlocked(folio))) {
94 long nr_pages = folio_nr_pages(folio);
b109b870 95
188e8cae
MWO
96 __folio_clear_mlocked(folio);
97 zone_stat_mod_folio(folio, NR_MLOCK, -nr_pages);
b109b870
HD
98 count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages);
99 }
91807063
AA
100}
101
83d99659 102static void __folio_put_small(struct folio *folio)
91807063 103{
188e8cae 104 __page_cache_release(folio);
83d99659
MWO
105 mem_cgroup_uncharge(folio);
106 free_unref_page(&folio->page, 0);
b221385b
AB
107}
108
5ef82fe7 109static void __folio_put_large(struct folio *folio)
1da177e4 110{
822fc613
NH
111 /*
112 * __page_cache_release() is supposed to be called for thp, not for
113 * hugetlb. This is because hugetlb page does never have PageLRU set
114 * (it's never listed to any LRU lists) and no memcg routines should
115 * be called for hugetlb (it has a separate hugetlb_cgroup.)
116 */
5ef82fe7 117 if (!folio_test_hugetlb(folio))
188e8cae 118 __page_cache_release(folio);
5375336c 119 destroy_large_folio(folio);
91807063
AA
120}
121
8d29c703 122void __folio_put(struct folio *folio)
8519fb30 123{
8d29c703
MWO
124 if (unlikely(folio_is_zone_device(folio)))
125 free_zone_device_page(&folio->page);
126 else if (unlikely(folio_test_large(folio)))
5ef82fe7 127 __folio_put_large(folio);
ddc58f27 128 else
83d99659 129 __folio_put_small(folio);
1da177e4 130}
8d29c703 131EXPORT_SYMBOL(__folio_put);
70b50f94 132
1d7ea732 133/**
7682486b
RD
134 * put_pages_list() - release a list of pages
135 * @pages: list of pages threaded on page->lru
1d7ea732 136 *
988c69f1 137 * Release a list of pages which are strung together on page.lru.
1d7ea732
AZ
138 */
139void put_pages_list(struct list_head *pages)
140{
2f58e5de 141 struct folio *folio, *next;
988c69f1 142
2f58e5de
MWO
143 list_for_each_entry_safe(folio, next, pages, lru) {
144 if (!folio_put_testzero(folio)) {
145 list_del(&folio->lru);
988c69f1
MWO
146 continue;
147 }
2f58e5de
MWO
148 if (folio_test_large(folio)) {
149 list_del(&folio->lru);
5ef82fe7 150 __folio_put_large(folio);
988c69f1
MWO
151 continue;
152 }
2f58e5de 153 /* LRU flag must be clear because it's passed using the lru */
1d7ea732 154 }
988c69f1
MWO
155
156 free_unref_page_list(pages);
3cd018b4 157 INIT_LIST_HEAD(pages);
1d7ea732
AZ
158}
159EXPORT_SYMBOL(put_pages_list);
160
c2bc1681
MWO
161typedef void (*move_fn_t)(struct lruvec *lruvec, struct folio *folio);
162
70dea534 163static void lru_add_fn(struct lruvec *lruvec, struct folio *folio)
7d80dd09
MWO
164{
165 int was_unevictable = folio_test_clear_unevictable(folio);
166 long nr_pages = folio_nr_pages(folio);
167
168 VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
169
7d80dd09
MWO
170 /*
171 * Is an smp_mb__after_atomic() still required here, before
188e8cae 172 * folio_evictable() tests the mlocked flag, to rule out the possibility
7d80dd09 173 * of stranding an evictable folio on an unevictable LRU? I think
e0650a41 174 * not, because __munlock_folio() only clears the mlocked flag
188e8cae 175 * while the LRU lock is held.
7d80dd09
MWO
176 *
177 * (That is not true of __page_cache_release(), and not necessarily
99fbb6bf 178 * true of folios_put(): but those only clear the mlocked flag after
188e8cae 179 * folio_put_testzero() has excluded any other users of the folio.)
7d80dd09
MWO
180 */
181 if (folio_evictable(folio)) {
182 if (was_unevictable)
183 __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages);
184 } else {
185 folio_clear_active(folio);
186 folio_set_unevictable(folio);
187 /*
188 * folio->mlock_count = !!folio_test_mlocked(folio)?
e0650a41 189 * But that leaves __mlock_folio() in doubt whether another
7d80dd09
MWO
190 * actor has already counted the mlock or not. Err on the
191 * safe side, underestimate, let page reclaim fix it, rather
192 * than leaving a page on the unevictable LRU indefinitely.
193 */
194 folio->mlock_count = 0;
195 if (!was_unevictable)
196 __count_vm_events(UNEVICTABLE_PGCULLED, nr_pages);
197 }
198
199 lruvec_add_folio(lruvec, folio);
200 trace_mm_lru_insertion(folio);
201}
202
c2bc1681 203static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
902aaed0
HH
204{
205 int i;
6168d0da 206 struct lruvec *lruvec = NULL;
3dd7ae8e 207 unsigned long flags = 0;
902aaed0 208
c2bc1681
MWO
209 for (i = 0; i < folio_batch_count(fbatch); i++) {
210 struct folio *folio = fbatch->folios[i];
3dd7ae8e 211
c2bc1681 212 /* block memcg migration while the folio moves between lru */
70dea534 213 if (move_fn != lru_add_fn && !folio_test_clear_lru(folio))
fc574c23
AS
214 continue;
215
0de340cb 216 lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags);
c2bc1681 217 move_fn(lruvec, folio);
fc574c23 218
c2bc1681 219 folio_set_lru(folio);
902aaed0 220 }
c2bc1681 221
6168d0da
AS
222 if (lruvec)
223 unlock_page_lruvec_irqrestore(lruvec, flags);
99fbb6bf 224 folios_put(fbatch);
d8505dee
SL
225}
226
c2bc1681
MWO
227static void folio_batch_add_and_move(struct folio_batch *fbatch,
228 struct folio *folio, move_fn_t move_fn)
3dd7ae8e 229{
c2bc1681
MWO
230 if (folio_batch_add(fbatch, folio) && !folio_test_large(folio) &&
231 !lru_cache_disabled())
232 return;
233 folio_batch_move_lru(fbatch, move_fn);
234}
575ced1c 235
c2bc1681
MWO
236static void lru_move_tail_fn(struct lruvec *lruvec, struct folio *folio)
237{
575ced1c
MWO
238 if (!folio_test_unevictable(folio)) {
239 lruvec_del_folio(lruvec, folio);
240 folio_clear_active(folio);
241 lruvec_add_folio_tail(lruvec, folio);
242 __count_vm_events(PGROTATED, folio_nr_pages(folio));
3dd7ae8e
SL
243 }
244}
245
1da177e4 246/*
575ced1c
MWO
247 * Writeback is about to end against a folio which has been marked for
248 * immediate reclaim. If it still appears to be reclaimable, move it
249 * to the tail of the inactive list.
c7c7b80c 250 *
575ced1c 251 * folio_rotate_reclaimable() must disable IRQs, to prevent nasty races.
1da177e4 252 */
575ced1c 253void folio_rotate_reclaimable(struct folio *folio)
1da177e4 254{
575ced1c
MWO
255 if (!folio_test_locked(folio) && !folio_test_dirty(folio) &&
256 !folio_test_unevictable(folio) && folio_test_lru(folio)) {
c2bc1681 257 struct folio_batch *fbatch;
ac6aadb2
MS
258 unsigned long flags;
259
575ced1c 260 folio_get(folio);
b01b2141 261 local_lock_irqsave(&lru_rotate.lock, flags);
c2bc1681
MWO
262 fbatch = this_cpu_ptr(&lru_rotate.fbatch);
263 folio_batch_add_and_move(fbatch, folio, lru_move_tail_fn);
b01b2141 264 local_unlock_irqrestore(&lru_rotate.lock, flags);
ac6aadb2 265 }
1da177e4
LT
266}
267
0538a82c
JW
268void lru_note_cost(struct lruvec *lruvec, bool file,
269 unsigned int nr_io, unsigned int nr_rotated)
3e2f41f1 270{
0538a82c
JW
271 unsigned long cost;
272
273 /*
274 * Reflect the relative cost of incurring IO and spending CPU
275 * time on rotations. This doesn't attempt to make a precise
276 * comparison, it just says: if reloads are about comparable
277 * between the LRU lists, or rotations are overwhelmingly
278 * different between them, adjust scan balance for CPU work.
279 */
280 cost = nr_io * SWAP_CLUSTER_MAX + nr_rotated;
281
7cf111bc
JW
282 do {
283 unsigned long lrusize;
284
6168d0da
AS
285 /*
286 * Hold lruvec->lru_lock is safe here, since
287 * 1) The pinned lruvec in reclaim, or
288 * 2) From a pre-LRU page during refault (which also holds the
289 * rcu lock, so would be safe even if the page was on the LRU
290 * and could move simultaneously to a new lruvec).
291 */
292 spin_lock_irq(&lruvec->lru_lock);
7cf111bc 293 /* Record cost event */
96f8bf4f 294 if (file)
0538a82c 295 lruvec->file_cost += cost;
7cf111bc 296 else
0538a82c 297 lruvec->anon_cost += cost;
7cf111bc
JW
298
299 /*
300 * Decay previous events
301 *
302 * Because workloads change over time (and to avoid
303 * overflow) we keep these statistics as a floating
304 * average, which ends up weighing recent refaults
305 * more than old ones.
306 */
307 lrusize = lruvec_page_state(lruvec, NR_INACTIVE_ANON) +
308 lruvec_page_state(lruvec, NR_ACTIVE_ANON) +
309 lruvec_page_state(lruvec, NR_INACTIVE_FILE) +
310 lruvec_page_state(lruvec, NR_ACTIVE_FILE);
311
312 if (lruvec->file_cost + lruvec->anon_cost > lrusize / 4) {
313 lruvec->file_cost /= 2;
314 lruvec->anon_cost /= 2;
315 }
6168d0da 316 spin_unlock_irq(&lruvec->lru_lock);
7cf111bc 317 } while ((lruvec = parent_lruvec(lruvec)));
3e2f41f1
KM
318}
319
0538a82c 320void lru_note_cost_refault(struct folio *folio)
96f8bf4f 321{
0995d7e5 322 lru_note_cost(folio_lruvec(folio), folio_is_file_lru(folio),
0538a82c 323 folio_nr_pages(folio), 0);
96f8bf4f
JW
324}
325
3a44610b 326static void folio_activate_fn(struct lruvec *lruvec, struct folio *folio)
1da177e4 327{
f2d27392
MWO
328 if (!folio_test_active(folio) && !folio_test_unevictable(folio)) {
329 long nr_pages = folio_nr_pages(folio);
744ed144 330
f2d27392
MWO
331 lruvec_del_folio(lruvec, folio);
332 folio_set_active(folio);
333 lruvec_add_folio(lruvec, folio);
334 trace_mm_lru_activate(folio);
4f98a2fe 335
21e330fc
SB
336 __count_vm_events(PGACTIVATE, nr_pages);
337 __count_memcg_events(lruvec_memcg(lruvec), PGACTIVATE,
338 nr_pages);
1da177e4 339 }
eb709b0d
SL
340}
341
342#ifdef CONFIG_SMP
3a44610b 343static void folio_activate_drain(int cpu)
f2d27392 344{
82ac64d8 345 struct folio_batch *fbatch = &per_cpu(cpu_fbatches.activate, cpu);
f2d27392 346
3a44610b
MWO
347 if (folio_batch_count(fbatch))
348 folio_batch_move_lru(fbatch, folio_activate_fn);
5fbc4616
CM
349}
350
018ee47f 351void folio_activate(struct folio *folio)
eb709b0d 352{
f2d27392
MWO
353 if (folio_test_lru(folio) && !folio_test_active(folio) &&
354 !folio_test_unevictable(folio)) {
3a44610b 355 struct folio_batch *fbatch;
eb709b0d 356
f2d27392 357 folio_get(folio);
82ac64d8
MWO
358 local_lock(&cpu_fbatches.lock);
359 fbatch = this_cpu_ptr(&cpu_fbatches.activate);
3a44610b 360 folio_batch_add_and_move(fbatch, folio, folio_activate_fn);
82ac64d8 361 local_unlock(&cpu_fbatches.lock);
eb709b0d
SL
362 }
363}
364
365#else
3a44610b 366static inline void folio_activate_drain(int cpu)
eb709b0d
SL
367{
368}
369
018ee47f 370void folio_activate(struct folio *folio)
eb709b0d 371{
6168d0da 372 struct lruvec *lruvec;
eb709b0d 373
f2d27392 374 if (folio_test_clear_lru(folio)) {
e809c3fe 375 lruvec = folio_lruvec_lock_irq(folio);
3a44610b 376 folio_activate_fn(lruvec, folio);
6168d0da 377 unlock_page_lruvec_irq(lruvec);
f2d27392 378 folio_set_lru(folio);
6168d0da 379 }
1da177e4 380}
eb709b0d 381#endif
1da177e4 382
76580b65 383static void __lru_cache_activate_folio(struct folio *folio)
059285a2 384{
70dea534 385 struct folio_batch *fbatch;
059285a2
MG
386 int i;
387
82ac64d8
MWO
388 local_lock(&cpu_fbatches.lock);
389 fbatch = this_cpu_ptr(&cpu_fbatches.lru_add);
b01b2141 390
059285a2 391 /*
70dea534
MWO
392 * Search backwards on the optimistic assumption that the folio being
393 * activated has just been added to this batch. Note that only
394 * the local batch is examined as a !LRU folio could be in the
059285a2 395 * process of being released, reclaimed, migrated or on a remote
70dea534
MWO
396 * batch that is currently being drained. Furthermore, marking
397 * a remote batch's folio active potentially hits a race where
398 * a folio is marked active just after it is added to the inactive
059285a2
MG
399 * list causing accounting errors and BUG_ON checks to trigger.
400 */
70dea534
MWO
401 for (i = folio_batch_count(fbatch) - 1; i >= 0; i--) {
402 struct folio *batch_folio = fbatch->folios[i];
059285a2 403
70dea534 404 if (batch_folio == folio) {
76580b65 405 folio_set_active(folio);
059285a2
MG
406 break;
407 }
408 }
409
82ac64d8 410 local_unlock(&cpu_fbatches.lock);
059285a2
MG
411}
412
ac35a490
YZ
413#ifdef CONFIG_LRU_GEN
414static void folio_inc_refs(struct folio *folio)
415{
416 unsigned long new_flags, old_flags = READ_ONCE(folio->flags);
417
418 if (folio_test_unevictable(folio))
419 return;
420
421 if (!folio_test_referenced(folio)) {
422 folio_set_referenced(folio);
423 return;
424 }
425
426 if (!folio_test_workingset(folio)) {
427 folio_set_workingset(folio);
428 return;
429 }
430
431 /* see the comment on MAX_NR_TIERS */
432 do {
433 new_flags = old_flags & LRU_REFS_MASK;
434 if (new_flags == LRU_REFS_MASK)
435 break;
436
437 new_flags += BIT(LRU_REFS_PGOFF);
438 new_flags |= old_flags & ~LRU_REFS_MASK;
439 } while (!try_cmpxchg(&folio->flags, &old_flags, new_flags));
440}
441#else
442static void folio_inc_refs(struct folio *folio)
443{
444}
445#endif /* CONFIG_LRU_GEN */
446
1da177e4
LT
447/*
448 * Mark a page as having seen activity.
449 *
450 * inactive,unreferenced -> inactive,referenced
451 * inactive,referenced -> active,unreferenced
452 * active,unreferenced -> active,referenced
eb39d618
HD
453 *
454 * When a newly allocated page is not yet visible, so safe for non-atomic ops,
455 * __SetPageReferenced(page) may be substituted for mark_page_accessed(page).
1da177e4 456 */
76580b65 457void folio_mark_accessed(struct folio *folio)
1da177e4 458{
ac35a490
YZ
459 if (lru_gen_enabled()) {
460 folio_inc_refs(folio);
461 return;
462 }
463
76580b65
MWO
464 if (!folio_test_referenced(folio)) {
465 folio_set_referenced(folio);
466 } else if (folio_test_unevictable(folio)) {
a1100a74
FW
467 /*
468 * Unevictable pages are on the "LRU_UNEVICTABLE" list. But,
469 * this list is never rotated or maintained, so marking an
914c32e4 470 * unevictable page accessed has no effect.
a1100a74 471 */
76580b65 472 } else if (!folio_test_active(folio)) {
059285a2 473 /*
3a44610b 474 * If the folio is on the LRU, queue it for activation via
82ac64d8 475 * cpu_fbatches.activate. Otherwise, assume the folio is in a
3a44610b 476 * folio_batch, mark it active and it'll be moved to the active
059285a2
MG
477 * LRU on the next drain.
478 */
76580b65
MWO
479 if (folio_test_lru(folio))
480 folio_activate(folio);
059285a2 481 else
76580b65
MWO
482 __lru_cache_activate_folio(folio);
483 folio_clear_referenced(folio);
484 workingset_activation(folio);
1da177e4 485 }
76580b65
MWO
486 if (folio_test_idle(folio))
487 folio_clear_idle(folio);
1da177e4 488}
76580b65 489EXPORT_SYMBOL(folio_mark_accessed);
1da177e4 490
f04e9ebb 491/**
0d31125d
MWO
492 * folio_add_lru - Add a folio to an LRU list.
493 * @folio: The folio to be added to the LRU.
2329d375 494 *
0d31125d 495 * Queue the folio for addition to the LRU. The decision on whether
2329d375 496 * to add the page to the [in]active [file|anon] list is deferred until the
82ac64d8 497 * folio_batch is drained. This gives a chance for the caller of folio_add_lru()
0d31125d 498 * have the folio added to the active list using folio_mark_accessed().
f04e9ebb 499 */
0d31125d 500void folio_add_lru(struct folio *folio)
1da177e4 501{
70dea534 502 struct folio_batch *fbatch;
6058eaec 503
70dea534
MWO
504 VM_BUG_ON_FOLIO(folio_test_active(folio) &&
505 folio_test_unevictable(folio), folio);
0d31125d 506 VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
6058eaec 507
ec1c86b2
YZ
508 /* see the comment in lru_gen_add_folio() */
509 if (lru_gen_enabled() && !folio_test_unevictable(folio) &&
510 lru_gen_in_fault() && !(current->flags & PF_MEMALLOC))
511 folio_set_active(folio);
512
0d31125d 513 folio_get(folio);
82ac64d8
MWO
514 local_lock(&cpu_fbatches.lock);
515 fbatch = this_cpu_ptr(&cpu_fbatches.lru_add);
70dea534 516 folio_batch_add_and_move(fbatch, folio, lru_add_fn);
82ac64d8 517 local_unlock(&cpu_fbatches.lock);
1da177e4 518}
0d31125d 519EXPORT_SYMBOL(folio_add_lru);
1da177e4 520
00501b53 521/**
681ecf63
MWO
522 * folio_add_lru_vma() - Add a folio to the appropate LRU list for this VMA.
523 * @folio: The folio to be added to the LRU.
524 * @vma: VMA in which the folio is mapped.
00501b53 525 *
681ecf63
MWO
526 * If the VMA is mlocked, @folio is added to the unevictable list.
527 * Otherwise, it is treated the same way as folio_add_lru().
00501b53 528 */
681ecf63 529void folio_add_lru_vma(struct folio *folio, struct vm_area_struct *vma)
00501b53 530{
681ecf63 531 VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
00501b53 532
2fbb0c10 533 if (unlikely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED))
96f97c43 534 mlock_new_folio(folio);
2fbb0c10 535 else
681ecf63 536 folio_add_lru(folio);
00501b53
JW
537}
538
31560180 539/*
7a3dbfe8 540 * If the folio cannot be invalidated, it is moved to the
31560180
MK
541 * inactive list to speed up its reclaim. It is moved to the
542 * head of the list, rather than the tail, to give the flusher
543 * threads some time to write it out, as this is much more
544 * effective than the single-page writeout from reclaim.
278df9f4 545 *
7a3dbfe8
MWO
546 * If the folio isn't mapped and dirty/writeback, the folio
547 * could be reclaimed asap using the reclaim flag.
278df9f4 548 *
7a3dbfe8
MWO
549 * 1. active, mapped folio -> none
550 * 2. active, dirty/writeback folio -> inactive, head, reclaim
551 * 3. inactive, mapped folio -> none
552 * 4. inactive, dirty/writeback folio -> inactive, head, reclaim
278df9f4
MK
553 * 5. inactive, clean -> inactive, tail
554 * 6. Others -> none
555 *
7a3dbfe8
MWO
556 * In 4, it moves to the head of the inactive list so the folio is
557 * written out by flusher threads as this is much more efficient
278df9f4 558 * than the single-page writeout from reclaim.
31560180 559 */
7a3dbfe8 560static void lru_deactivate_file_fn(struct lruvec *lruvec, struct folio *folio)
31560180 561{
7a3dbfe8
MWO
562 bool active = folio_test_active(folio);
563 long nr_pages = folio_nr_pages(folio);
31560180 564
7a3dbfe8 565 if (folio_test_unevictable(folio))
bad49d9c
MK
566 return;
567
7a3dbfe8
MWO
568 /* Some processes are using the folio */
569 if (folio_mapped(folio))
31560180
MK
570 return;
571
7a3dbfe8
MWO
572 lruvec_del_folio(lruvec, folio);
573 folio_clear_active(folio);
574 folio_clear_referenced(folio);
31560180 575
7a3dbfe8 576 if (folio_test_writeback(folio) || folio_test_dirty(folio)) {
278df9f4 577 /*
7a3dbfe8
MWO
578 * Setting the reclaim flag could race with
579 * folio_end_writeback() and confuse readahead. But the
580 * race window is _really_ small and it's not a critical
581 * problem.
278df9f4 582 */
7a3dbfe8
MWO
583 lruvec_add_folio(lruvec, folio);
584 folio_set_reclaim(folio);
278df9f4
MK
585 } else {
586 /*
7a3dbfe8
MWO
587 * The folio's writeback ended while it was in the batch.
588 * We move that folio to the tail of the inactive list.
278df9f4 589 */
7a3dbfe8 590 lruvec_add_folio_tail(lruvec, folio);
5d91f31f 591 __count_vm_events(PGROTATED, nr_pages);
278df9f4
MK
592 }
593
21e330fc 594 if (active) {
5d91f31f 595 __count_vm_events(PGDEACTIVATE, nr_pages);
21e330fc
SB
596 __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE,
597 nr_pages);
598 }
31560180
MK
599}
600
85cd7791 601static void lru_deactivate_fn(struct lruvec *lruvec, struct folio *folio)
9c276cc6 602{
ec1c86b2 603 if (!folio_test_unevictable(folio) && (folio_test_active(folio) || lru_gen_enabled())) {
85cd7791 604 long nr_pages = folio_nr_pages(folio);
9c276cc6 605
85cd7791
MWO
606 lruvec_del_folio(lruvec, folio);
607 folio_clear_active(folio);
608 folio_clear_referenced(folio);
609 lruvec_add_folio(lruvec, folio);
9c276cc6 610
21e330fc
SB
611 __count_vm_events(PGDEACTIVATE, nr_pages);
612 __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE,
613 nr_pages);
9c276cc6
MK
614 }
615}
10853a03 616
cec394ba 617static void lru_lazyfree_fn(struct lruvec *lruvec, struct folio *folio)
10853a03 618{
cec394ba
MWO
619 if (folio_test_anon(folio) && folio_test_swapbacked(folio) &&
620 !folio_test_swapcache(folio) && !folio_test_unevictable(folio)) {
621 long nr_pages = folio_nr_pages(folio);
10853a03 622
cec394ba
MWO
623 lruvec_del_folio(lruvec, folio);
624 folio_clear_active(folio);
625 folio_clear_referenced(folio);
f7ad2a6c 626 /*
cec394ba
MWO
627 * Lazyfree folios are clean anonymous folios. They have
628 * the swapbacked flag cleared, to distinguish them from normal
629 * anonymous folios
f7ad2a6c 630 */
cec394ba
MWO
631 folio_clear_swapbacked(folio);
632 lruvec_add_folio(lruvec, folio);
10853a03 633
21e330fc
SB
634 __count_vm_events(PGLAZYFREE, nr_pages);
635 __count_memcg_events(lruvec_memcg(lruvec), PGLAZYFREE,
636 nr_pages);
10853a03
MK
637 }
638}
639
902aaed0 640/*
82ac64d8 641 * Drain pages out of the cpu's folio_batch.
902aaed0
HH
642 * Either "cpu" is the current CPU, and preemption has already been
643 * disabled; or "cpu" is being hot-unplugged, and is already dead.
644 */
f0cb3c76 645void lru_add_drain_cpu(int cpu)
1da177e4 646{
a2d33b5d
MWO
647 struct cpu_fbatches *fbatches = &per_cpu(cpu_fbatches, cpu);
648 struct folio_batch *fbatch = &fbatches->lru_add;
1da177e4 649
70dea534
MWO
650 if (folio_batch_count(fbatch))
651 folio_batch_move_lru(fbatch, lru_add_fn);
902aaed0 652
c2bc1681 653 fbatch = &per_cpu(lru_rotate.fbatch, cpu);
7e0cc01e 654 /* Disabling interrupts below acts as a compiler barrier. */
c2bc1681 655 if (data_race(folio_batch_count(fbatch))) {
902aaed0
HH
656 unsigned long flags;
657
658 /* No harm done if a racing interrupt already did this */
b01b2141 659 local_lock_irqsave(&lru_rotate.lock, flags);
c2bc1681 660 folio_batch_move_lru(fbatch, lru_move_tail_fn);
b01b2141 661 local_unlock_irqrestore(&lru_rotate.lock, flags);
902aaed0 662 }
31560180 663
a2d33b5d 664 fbatch = &fbatches->lru_deactivate_file;
7a3dbfe8
MWO
665 if (folio_batch_count(fbatch))
666 folio_batch_move_lru(fbatch, lru_deactivate_file_fn);
eb709b0d 667
a2d33b5d 668 fbatch = &fbatches->lru_deactivate;
85cd7791
MWO
669 if (folio_batch_count(fbatch))
670 folio_batch_move_lru(fbatch, lru_deactivate_fn);
9c276cc6 671
a2d33b5d 672 fbatch = &fbatches->lru_lazyfree;
cec394ba
MWO
673 if (folio_batch_count(fbatch))
674 folio_batch_move_lru(fbatch, lru_lazyfree_fn);
10853a03 675
3a44610b 676 folio_activate_drain(cpu);
31560180
MK
677}
678
679/**
7a3dbfe8 680 * deactivate_file_folio() - Deactivate a file folio.
261b6840 681 * @folio: Folio to deactivate.
31560180 682 *
261b6840
MWO
683 * This function hints to the VM that @folio is a good reclaim candidate,
684 * for example if its invalidation fails due to the folio being dirty
31560180 685 * or under writeback.
261b6840 686 *
7a3dbfe8 687 * Context: Caller holds a reference on the folio.
31560180 688 */
261b6840 689void deactivate_file_folio(struct folio *folio)
31560180 690{
7a3dbfe8 691 struct folio_batch *fbatch;
261b6840 692
7a3dbfe8 693 /* Deactivating an unevictable folio will not accelerate reclaim */
261b6840 694 if (folio_test_unevictable(folio))
821ed6bb
MK
695 return;
696
261b6840 697 folio_get(folio);
82ac64d8
MWO
698 local_lock(&cpu_fbatches.lock);
699 fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate_file);
7a3dbfe8 700 folio_batch_add_and_move(fbatch, folio, lru_deactivate_file_fn);
82ac64d8 701 local_unlock(&cpu_fbatches.lock);
80bfed90
AM
702}
703
9c276cc6 704/*
5a9e3474
VMO
705 * folio_deactivate - deactivate a folio
706 * @folio: folio to deactivate
9c276cc6 707 *
5a9e3474
VMO
708 * folio_deactivate() moves @folio to the inactive list if @folio was on the
709 * active list and was not unevictable. This is done to accelerate the
710 * reclaim of @folio.
9c276cc6 711 */
5a9e3474 712void folio_deactivate(struct folio *folio)
9c276cc6 713{
ec1c86b2
YZ
714 if (folio_test_lru(folio) && !folio_test_unevictable(folio) &&
715 (folio_test_active(folio) || lru_gen_enabled())) {
85cd7791
MWO
716 struct folio_batch *fbatch;
717
718 folio_get(folio);
82ac64d8
MWO
719 local_lock(&cpu_fbatches.lock);
720 fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate);
85cd7791 721 folio_batch_add_and_move(fbatch, folio, lru_deactivate_fn);
82ac64d8 722 local_unlock(&cpu_fbatches.lock);
9c276cc6
MK
723 }
724}
725
10853a03 726/**
6a6fe9eb
KW
727 * folio_mark_lazyfree - make an anon folio lazyfree
728 * @folio: folio to deactivate
10853a03 729 *
6a6fe9eb
KW
730 * folio_mark_lazyfree() moves @folio to the inactive file list.
731 * This is done to accelerate the reclaim of @folio.
10853a03 732 */
6a6fe9eb 733void folio_mark_lazyfree(struct folio *folio)
10853a03 734{
cec394ba
MWO
735 if (folio_test_lru(folio) && folio_test_anon(folio) &&
736 folio_test_swapbacked(folio) && !folio_test_swapcache(folio) &&
737 !folio_test_unevictable(folio)) {
738 struct folio_batch *fbatch;
739
740 folio_get(folio);
82ac64d8
MWO
741 local_lock(&cpu_fbatches.lock);
742 fbatch = this_cpu_ptr(&cpu_fbatches.lru_lazyfree);
cec394ba 743 folio_batch_add_and_move(fbatch, folio, lru_lazyfree_fn);
82ac64d8 744 local_unlock(&cpu_fbatches.lock);
10853a03
MK
745 }
746}
747
80bfed90
AM
748void lru_add_drain(void)
749{
82ac64d8 750 local_lock(&cpu_fbatches.lock);
b01b2141 751 lru_add_drain_cpu(smp_processor_id());
82ac64d8 752 local_unlock(&cpu_fbatches.lock);
96f97c43 753 mlock_drain_local();
b01b2141
IM
754}
755
243418e3
MK
756/*
757 * It's called from per-cpu workqueue context in SMP case so
758 * lru_add_drain_cpu and invalidate_bh_lrus_cpu should run on
759 * the same cpu. It shouldn't be a problem in !SMP case since
760 * the core is only one and the locks will disable preemption.
761 */
762static void lru_add_and_bh_lrus_drain(void)
763{
82ac64d8 764 local_lock(&cpu_fbatches.lock);
243418e3 765 lru_add_drain_cpu(smp_processor_id());
82ac64d8 766 local_unlock(&cpu_fbatches.lock);
243418e3 767 invalidate_bh_lrus_cpu();
96f97c43 768 mlock_drain_local();
243418e3
MK
769}
770
b01b2141
IM
771void lru_add_drain_cpu_zone(struct zone *zone)
772{
82ac64d8 773 local_lock(&cpu_fbatches.lock);
b01b2141
IM
774 lru_add_drain_cpu(smp_processor_id());
775 drain_local_pages(zone);
82ac64d8 776 local_unlock(&cpu_fbatches.lock);
96f97c43 777 mlock_drain_local();
1da177e4
LT
778}
779
6ea183d6
MH
780#ifdef CONFIG_SMP
781
782static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
783
c4028958 784static void lru_add_drain_per_cpu(struct work_struct *dummy)
053837fc 785{
243418e3 786 lru_add_and_bh_lrus_drain();
053837fc
NP
787}
788
4864545a
MWO
789static bool cpu_needs_drain(unsigned int cpu)
790{
791 struct cpu_fbatches *fbatches = &per_cpu(cpu_fbatches, cpu);
792
793 /* Check these in order of likelihood that they're not zero */
794 return folio_batch_count(&fbatches->lru_add) ||
795 data_race(folio_batch_count(&per_cpu(lru_rotate.fbatch, cpu))) ||
796 folio_batch_count(&fbatches->lru_deactivate_file) ||
797 folio_batch_count(&fbatches->lru_deactivate) ||
798 folio_batch_count(&fbatches->lru_lazyfree) ||
799 folio_batch_count(&fbatches->activate) ||
96f97c43 800 need_mlock_drain(cpu) ||
4864545a
MWO
801 has_bh_in_lru(cpu, NULL);
802}
803
9852a721
MH
804/*
805 * Doesn't need any cpu hotplug locking because we do rely on per-cpu
806 * kworkers being shut down before our page_alloc_cpu_dead callback is
807 * executed on the offlined cpu.
808 * Calling this function with cpu hotplug locks held can actually lead
809 * to obscure indirect dependencies via WQ context.
810 */
3db3264d 811static inline void __lru_add_drain_all(bool force_all_cpus)
053837fc 812{
6446a513
AD
813 /*
814 * lru_drain_gen - Global pages generation number
815 *
816 * (A) Definition: global lru_drain_gen = x implies that all generations
817 * 0 < n <= x are already *scheduled* for draining.
818 *
819 * This is an optimization for the highly-contended use case where a
820 * user space workload keeps constantly generating a flow of pages for
821 * each CPU.
822 */
823 static unsigned int lru_drain_gen;
5fbc4616 824 static struct cpumask has_work;
6446a513
AD
825 static DEFINE_MUTEX(lock);
826 unsigned cpu, this_gen;
5fbc4616 827
ce612879
MH
828 /*
829 * Make sure nobody triggers this path before mm_percpu_wq is fully
830 * initialized.
831 */
832 if (WARN_ON(!mm_percpu_wq))
833 return;
834
6446a513 835 /*
82ac64d8
MWO
836 * Guarantee folio_batch counter stores visible by this CPU
837 * are visible to other CPUs before loading the current drain
838 * generation.
6446a513
AD
839 */
840 smp_mb();
841
842 /*
843 * (B) Locally cache global LRU draining generation number
844 *
845 * The read barrier ensures that the counter is loaded before the mutex
846 * is taken. It pairs with smp_mb() inside the mutex critical section
847 * at (D).
848 */
849 this_gen = smp_load_acquire(&lru_drain_gen);
eef1a429 850
5fbc4616 851 mutex_lock(&lock);
eef1a429
KK
852
853 /*
6446a513
AD
854 * (C) Exit the draining operation if a newer generation, from another
855 * lru_add_drain_all(), was already scheduled for draining. Check (A).
eef1a429 856 */
d479960e 857 if (unlikely(this_gen != lru_drain_gen && !force_all_cpus))
eef1a429
KK
858 goto done;
859
6446a513
AD
860 /*
861 * (D) Increment global generation number
862 *
863 * Pairs with smp_load_acquire() at (B), outside of the critical
82ac64d8
MWO
864 * section. Use a full memory barrier to guarantee that the
865 * new global drain generation number is stored before loading
866 * folio_batch counters.
6446a513
AD
867 *
868 * This pairing must be done here, before the for_each_online_cpu loop
869 * below which drains the page vectors.
870 *
871 * Let x, y, and z represent some system CPU numbers, where x < y < z.
cb152a1a 872 * Assume CPU #z is in the middle of the for_each_online_cpu loop
6446a513
AD
873 * below and has already reached CPU #y's per-cpu data. CPU #x comes
874 * along, adds some pages to its per-cpu vectors, then calls
875 * lru_add_drain_all().
876 *
877 * If the paired barrier is done at any later step, e.g. after the
878 * loop, CPU #x will just exit at (C) and miss flushing out all of its
879 * added pages.
880 */
881 WRITE_ONCE(lru_drain_gen, lru_drain_gen + 1);
882 smp_mb();
eef1a429 883
5fbc4616 884 cpumask_clear(&has_work);
5fbc4616
CM
885 for_each_online_cpu(cpu) {
886 struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
887
4864545a 888 if (cpu_needs_drain(cpu)) {
5fbc4616 889 INIT_WORK(work, lru_add_drain_per_cpu);
ce612879 890 queue_work_on(cpu, mm_percpu_wq, work);
6446a513 891 __cpumask_set_cpu(cpu, &has_work);
5fbc4616
CM
892 }
893 }
894
895 for_each_cpu(cpu, &has_work)
896 flush_work(&per_cpu(lru_add_drain_work, cpu));
897
eef1a429 898done:
5fbc4616 899 mutex_unlock(&lock);
053837fc 900}
d479960e
MK
901
902void lru_add_drain_all(void)
903{
904 __lru_add_drain_all(false);
905}
6ea183d6
MH
906#else
907void lru_add_drain_all(void)
908{
909 lru_add_drain();
910}
6446a513 911#endif /* CONFIG_SMP */
053837fc 912
d479960e
MK
913atomic_t lru_disable_count = ATOMIC_INIT(0);
914
915/*
916 * lru_cache_disable() needs to be called before we start compiling
917 * a list of pages to be migrated using isolate_lru_page().
918 * It drains pages on LRU cache and then disable on all cpus until
919 * lru_cache_enable is called.
920 *
921 * Must be paired with a call to lru_cache_enable().
922 */
923void lru_cache_disable(void)
924{
925 atomic_inc(&lru_disable_count);
d479960e 926 /*
ff042f4a
MT
927 * Readers of lru_disable_count are protected by either disabling
928 * preemption or rcu_read_lock:
929 *
930 * preempt_disable, local_irq_disable [bh_lru_lock()]
931 * rcu_read_lock [rt_spin_lock CONFIG_PREEMPT_RT]
932 * preempt_disable [local_lock !CONFIG_PREEMPT_RT]
933 *
934 * Since v5.1 kernel, synchronize_rcu() is guaranteed to wait on
935 * preempt_disable() regions of code. So any CPU which sees
936 * lru_disable_count = 0 will have exited the critical
937 * section when synchronize_rcu() returns.
d479960e 938 */
31733463 939 synchronize_rcu_expedited();
ff042f4a 940#ifdef CONFIG_SMP
d479960e
MK
941 __lru_add_drain_all(true);
942#else
243418e3 943 lru_add_and_bh_lrus_drain();
d479960e
MK
944#endif
945}
946
aabfb572 947/**
99fbb6bf
MWO
948 * folios_put_refs - Reduce the reference count on a batch of folios.
949 * @folios: The folios.
950 * @refs: The number of refs to subtract from each folio.
1da177e4 951 *
99fbb6bf
MWO
952 * Like folio_put(), but for a batch of folios. This is more efficient
953 * than writing the loop yourself as it will optimise the locks which need
954 * to be taken if the folios are freed. The folios batch is returned
955 * empty and ready to be reused for another batch; there is no need
956 * to reinitialise it. If @refs is NULL, we subtract one from each
957 * folio refcount.
449c7967 958 *
99fbb6bf
MWO
959 * Context: May be called in process or interrupt context, but not in NMI
960 * context. May be called while holding a spinlock.
1da177e4 961 */
99fbb6bf 962void folios_put_refs(struct folio_batch *folios, unsigned int *refs)
1da177e4
LT
963{
964 int i;
cc59850e 965 LIST_HEAD(pages_to_free);
6168d0da 966 struct lruvec *lruvec = NULL;
0de340cb 967 unsigned long flags = 0;
1da177e4 968
99fbb6bf
MWO
969 for (i = 0; i < folios->nr; i++) {
970 struct folio *folio = folios->folios[i];
971 unsigned int nr_refs = refs ? refs[i] : 1;
aabfb572 972
ab5e653e 973 if (is_huge_zero_page(&folio->page))
aa88b68c 974 continue;
aa88b68c 975
ab5e653e 976 if (folio_is_zone_device(folio)) {
6168d0da
AS
977 if (lruvec) {
978 unlock_page_lruvec_irqrestore(lruvec, flags);
979 lruvec = NULL;
df6ad698 980 }
d7f861b9 981 if (put_devmap_managed_page_refs(&folio->page, nr_refs))
c5d6c45e 982 continue;
d7f861b9 983 if (folio_ref_sub_and_test(folio, nr_refs))
ab5e653e 984 free_zone_device_page(&folio->page);
43fbdeb3 985 continue;
df6ad698
JG
986 }
987
d7f861b9 988 if (!folio_ref_sub_and_test(folio, nr_refs))
1da177e4
LT
989 continue;
990
ab5e653e 991 if (folio_test_large(folio)) {
6168d0da
AS
992 if (lruvec) {
993 unlock_page_lruvec_irqrestore(lruvec, flags);
994 lruvec = NULL;
ddc58f27 995 }
5ef82fe7 996 __folio_put_large(folio);
ddc58f27
KS
997 continue;
998 }
999
ab5e653e 1000 if (folio_test_lru(folio)) {
0de340cb 1001 lruvec = folio_lruvec_relock_irqsave(folio, lruvec,
2a5e4e34 1002 &flags);
ab5e653e
MWO
1003 lruvec_del_folio(lruvec, folio);
1004 __folio_clear_lru_flags(folio);
46453a6e
NP
1005 }
1006
b109b870
HD
1007 /*
1008 * In rare cases, when truncation or holepunching raced with
1009 * munlock after VM_LOCKED was cleared, Mlocked may still be
1010 * found set here. This does not indicate a problem, unless
1011 * "unevictable_pgs_cleared" appears worryingly large.
1012 */
ab5e653e
MWO
1013 if (unlikely(folio_test_mlocked(folio))) {
1014 __folio_clear_mlocked(folio);
1015 zone_stat_sub_folio(folio, NR_MLOCK);
b109b870
HD
1016 count_vm_event(UNEVICTABLE_PGCLEARED);
1017 }
1018
ab5e653e 1019 list_add(&folio->lru, &pages_to_free);
1da177e4 1020 }
6168d0da
AS
1021 if (lruvec)
1022 unlock_page_lruvec_irqrestore(lruvec, flags);
1da177e4 1023
747db954 1024 mem_cgroup_uncharge_list(&pages_to_free);
2d4894b5 1025 free_unref_page_list(&pages_to_free);
99fbb6bf
MWO
1026 folio_batch_reinit(folios);
1027}
1028EXPORT_SYMBOL(folios_put_refs);
1029
1030/**
1031 * release_pages - batched put_page()
1032 * @arg: array of pages to release
1033 * @nr: number of pages
1034 *
1035 * Decrement the reference count on all the pages in @arg. If it
1036 * fell to zero, remove the page from the LRU and free it.
1037 *
1038 * Note that the argument can be an array of pages, encoded pages,
1039 * or folio pointers. We ignore any encoded bits, and turn any of
1040 * them into just a folio that gets free'd.
1041 */
1042void release_pages(release_pages_arg arg, int nr)
1043{
1044 struct folio_batch fbatch;
1045 int refs[PAGEVEC_SIZE];
1046 struct encoded_page **encoded = arg.encoded_pages;
1047 int i;
1048
1049 folio_batch_init(&fbatch);
1050 for (i = 0; i < nr; i++) {
1051 /* Turn any of the argument types into a folio */
1052 struct folio *folio = page_folio(encoded_page_ptr(encoded[i]));
1053
1054 /* Is our next entry actually "nr_pages" -> "nr_refs" ? */
1055 refs[fbatch.nr] = 1;
1056 if (unlikely(encoded_page_flags(encoded[i]) &
1057 ENCODED_PAGE_BIT_NR_PAGES_NEXT))
1058 refs[fbatch.nr] = encoded_nr_pages(encoded[++i]);
1059
1060 if (folio_batch_add(&fbatch, folio) > 0)
1061 continue;
1062 folios_put_refs(&fbatch, refs);
1063 }
1064
1065 if (fbatch.nr)
1066 folios_put_refs(&fbatch, refs);
1da177e4 1067}
0be8557b 1068EXPORT_SYMBOL(release_pages);
1da177e4
LT
1069
1070/*
1e0877d5 1071 * The folios which we're about to release may be in the deferred lru-addition
1da177e4 1072 * queues. That would prevent them from really being freed right now. That's
1e0877d5 1073 * OK from a correctness point of view but is inefficient - those folios may be
1da177e4
LT
1074 * cache-warm and we want to give them back to the page allocator ASAP.
1075 *
1e0877d5 1076 * So __folio_batch_release() will drain those queues here.
70dea534 1077 * folio_batch_move_lru() calls folios_put() directly to avoid
1da177e4
LT
1078 * mutual recursion.
1079 */
1e0877d5 1080void __folio_batch_release(struct folio_batch *fbatch)
1da177e4 1081{
1e0877d5 1082 if (!fbatch->percpu_pvec_drained) {
d9ed0d08 1083 lru_add_drain();
1e0877d5 1084 fbatch->percpu_pvec_drained = true;
d9ed0d08 1085 }
1e0877d5
MWO
1086 release_pages(fbatch->folios, folio_batch_count(fbatch));
1087 folio_batch_reinit(fbatch);
1da177e4 1088}
1e0877d5 1089EXPORT_SYMBOL(__folio_batch_release);
7f285701 1090
0cd6144a 1091/**
1613fac9
MWO
1092 * folio_batch_remove_exceptionals() - Prune non-folios from a batch.
1093 * @fbatch: The batch to prune
0cd6144a 1094 *
1613fac9
MWO
1095 * find_get_entries() fills a batch with both folios and shadow/swap/DAX
1096 * entries. This function prunes all the non-folio entries from @fbatch
1097 * without leaving holes, so that it can be passed on to folio-only batch
1098 * operations.
0cd6144a 1099 */
1613fac9 1100void folio_batch_remove_exceptionals(struct folio_batch *fbatch)
0cd6144a 1101{
1613fac9 1102 unsigned int i, j;
0cd6144a 1103
1613fac9
MWO
1104 for (i = 0, j = 0; i < folio_batch_count(fbatch); i++) {
1105 struct folio *folio = fbatch->folios[i];
1106 if (!xa_is_value(folio))
1107 fbatch->folios[j++] = folio;
0cd6144a 1108 }
1613fac9 1109 fbatch->nr = j;
0cd6144a
JW
1110}
1111
1da177e4
LT
1112/*
1113 * Perform any setup for the swap system
1114 */
1115void __init swap_setup(void)
1116{
ca79b0c2 1117 unsigned long megs = totalram_pages() >> (20 - PAGE_SHIFT);
e0bf68dd 1118
1da177e4
LT
1119 /* Use a smaller cluster for small-memory machines */
1120 if (megs < 16)
1121 page_cluster = 2;
1122 else
1123 page_cluster = 3;
1124 /*
1125 * Right now other parts of the system means that we
1126 * _really_ don't want to cluster much more
1127 */
1da177e4 1128}