{
unsigned long nr;
- nr = zone_page_state_snapshot(zone, NR_ZONE_LRU_FILE);
+ nr = zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_FILE) +
+ zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_FILE);
if (get_nr_swap_pages() > 0)
- nr += zone_page_state_snapshot(zone, NR_ZONE_LRU_ANON);
+ nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) +
+ zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON);
return nr;
}
ClearPageReclaim(page);
}
trace_mm_vmscan_writepage(page);
- inc_zone_page_state(page, NR_VMSCAN_WRITE);
+ inc_node_page_state(page, NR_VMSCAN_WRITE);
return PAGE_SUCCESS;
}
* except we already have the page isolated
* and know it's dirty
*/
- inc_zone_page_state(page, NR_VMSCAN_IMMEDIATE);
+ inc_node_page_state(page, NR_VMSCAN_IMMEDIATE);
SetPageReclaim(page);
goto keep_locked;
return ret;
}
+
+/*
+ * Update LRU sizes after isolating pages. The LRU size updates must
+ * be complete before mem_cgroup_update_lru_size due to a santity check.
+ */
+static __always_inline void update_lru_sizes(struct lruvec *lruvec,
+ enum lru_list lru, unsigned long *nr_zone_taken,
+ unsigned long nr_taken)
+{
+ int zid;
+
+ for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+ if (!nr_zone_taken[zid])
+ continue;
+
+ __update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]);
+ }
+
+#ifdef CONFIG_MEMCG
+ mem_cgroup_update_lru_size(lruvec, lru, -nr_taken);
+#endif
+}
+
/*
* zone_lru_lock is heavily contended. Some of the functions that
* shrink the lists perform better by taking out a batch of pages
struct list_head *src = &lruvec->lists[lru];
unsigned long nr_taken = 0;
unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 };
+ unsigned long nr_skipped[MAX_NR_ZONES] = { 0, };
unsigned long scan, nr_pages;
LIST_HEAD(pages_skipped);
for (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan &&
- !list_empty(src); scan++) {
+ !list_empty(src);) {
struct page *page;
page = lru_to_page(src);
if (page_zonenum(page) > sc->reclaim_idx) {
list_move(&page->lru, &pages_skipped);
+ nr_skipped[page_zonenum(page)]++;
continue;
}
+ /*
+ * Account for scanned and skipped separetly to avoid the pgdat
+ * being prematurely marked unreclaimable by pgdat_reclaimable.
+ */
+ scan++;
+
switch (__isolate_lru_page(page, mode)) {
case 0:
nr_pages = hpage_nr_pages(page);
* scanning would soon rescan the same pages to skip and put the
* system at risk of premature OOM.
*/
- if (!list_empty(&pages_skipped))
+ if (!list_empty(&pages_skipped)) {
+ int zid;
+ unsigned long total_skipped = 0;
+
+ for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+ if (!nr_skipped[zid])
+ continue;
+
+ __count_zid_vm_events(PGSCAN_SKIP, zid, nr_skipped[zid]);
+ total_skipped += nr_skipped[zid];
+ }
+
+ /*
+ * Account skipped pages as a partial scan as the pgdat may be
+ * close to unreclaimable. If the LRU list is empty, account
+ * skipped pages as a full scan.
+ */
+ scan += list_empty(src) ? total_skipped : total_skipped >> 2;
+
list_splice(&pages_skipped, src);
+ }
*nr_scanned = scan;
- trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan,
+ trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, scan,
nr_taken, mode, is_file_lru(lru));
- for (scan = 0; scan < MAX_NR_ZONES; scan++) {
- nr_pages = nr_zone_taken[scan];
- if (!nr_pages)
- continue;
-
- update_lru_size(lruvec, lru, scan, -nr_pages);
- }
+ update_lru_sizes(lruvec, lru, nr_zone_taken, nr_taken);
return nr_taken;
}
bdi_write_congested(current->backing_dev_info);
}
+static bool inactive_reclaimable_pages(struct lruvec *lruvec,
+ struct scan_control *sc, enum lru_list lru)
+{
+ int zid;
+ struct zone *zone;
+ int file = is_file_lru(lru);
+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+
+ if (!global_reclaim(sc))
+ return true;
+
+ for (zid = sc->reclaim_idx; zid >= 0; zid--) {
+ zone = &pgdat->node_zones[zid];
+ if (!populated_zone(zone))
+ continue;
+
+ if (zone_page_state_snapshot(zone, NR_ZONE_LRU_BASE +
+ LRU_FILE * file) >= SWAP_CLUSTER_MAX)
+ return true;
+ }
+
+ return false;
+}
+
/*
* shrink_inactive_list() is a helper for shrink_node(). It returns the number
* of reclaimed pages
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
+ if (!inactive_reclaimable_pages(lruvec, sc, lru))
+ return 0;
+
while (unlikely(too_many_isolated(pgdat, file, sc))) {
congestion_wait(BLK_RW_ASYNC, HZ/10);
* 1TB 101 10GB
* 10TB 320 32GB
*/
-static bool inactive_list_is_low(struct lruvec *lruvec, bool file)
+static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
+ struct scan_control *sc)
{
unsigned long inactive_ratio;
unsigned long inactive;
unsigned long active;
unsigned long gb;
+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+ int zid;
/*
* If we don't have swap space, anonymous page deactivation
inactive = lruvec_lru_size(lruvec, file * LRU_FILE);
active = lruvec_lru_size(lruvec, file * LRU_FILE + LRU_ACTIVE);
+ /*
+ * For zone-constrained allocations, it is necessary to check if
+ * deactivations are required for lowmem to be reclaimed. This
+ * calculates the inactive/active pages available in eligible zones.
+ */
+ for (zid = sc->reclaim_idx + 1; zid < MAX_NR_ZONES; zid++) {
+ struct zone *zone = &pgdat->node_zones[zid];
+ unsigned long inactive_zone, active_zone;
+
+ if (!populated_zone(zone))
+ continue;
+
+ inactive_zone = zone_page_state(zone,
+ NR_ZONE_LRU_BASE + (file * LRU_FILE));
+ active_zone = zone_page_state(zone,
+ NR_ZONE_LRU_BASE + (file * LRU_FILE) + LRU_ACTIVE);
+
+ inactive -= min(inactive, inactive_zone);
+ active -= min(active, active_zone);
+ }
+
gb = (inactive + active) >> (30 - PAGE_SHIFT);
if (gb)
inactive_ratio = int_sqrt(10 * gb);
struct lruvec *lruvec, struct scan_control *sc)
{
if (is_active_lru(lru)) {
- if (inactive_list_is_low(lruvec, is_file_lru(lru)))
+ if (inactive_list_is_low(lruvec, is_file_lru(lru), sc))
shrink_active_list(nr_to_scan, lruvec, sc, lru);
return 0;
}
* lruvec even if it has plenty of old anonymous pages unless the
* system is under heavy pressure.
*/
- if (!inactive_list_is_low(lruvec, true) &&
+ if (!inactive_list_is_low(lruvec, true, sc) &&
lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) {
scan_balance = SCAN_FILE;
goto out;
* Even if we did not try to evict anon pages at all, we want to
* rebalance the anon lru active/inactive ratio.
*/
- if (inactive_list_is_low(lruvec, false))
+ if (inactive_list_is_low(lruvec, false, sc))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
return true;
}
-static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc,
- enum zone_type classzone_idx)
+static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
{
struct reclaim_state *reclaim_state = current->reclaim_state;
unsigned long nr_reclaimed, nr_scanned;
shrink_node_memcg(pgdat, memcg, sc, &lru_pages);
node_lru_pages += lru_pages;
- if (!global_reclaim(sc))
+ if (memcg)
shrink_slab(sc->gfp_mask, pgdat->node_id,
memcg, sc->nr_scanned - scanned,
lru_pages);
* Returns true if compaction should go ahead for a high-order request, or
* the high-order allocation would succeed without compaction.
*/
-static inline bool compaction_ready(struct zone *zone, int order, int classzone_idx)
+static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
{
unsigned long watermark;
bool watermark_ok;
* there is a buffer of free pages available to give compaction
* a reasonable chance of completing and allocating the page
*/
- watermark = high_wmark_pages(zone) + (2UL << order);
- watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, classzone_idx);
+ watermark = high_wmark_pages(zone) + (2UL << sc->order);
+ watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, sc->reclaim_idx);
/*
* If compaction is deferred, reclaim up to a point where
* compaction will have a chance of success when re-enabled
*/
- if (compaction_deferred(zone, order))
+ if (compaction_deferred(zone, sc->order))
return watermark_ok;
/*
* If compaction is not ready to start and allocation is not likely
* to succeed without it, then keep reclaiming.
*/
- if (compaction_suitable(zone, order, 0, classzone_idx) == COMPACT_SKIPPED)
+ if (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx) == COMPACT_SKIPPED)
return false;
return watermark_ok;
unsigned long nr_soft_reclaimed;
unsigned long nr_soft_scanned;
gfp_t orig_mask;
- enum zone_type classzone_idx;
pg_data_t *last_pgdat = NULL;
/*
orig_mask = sc->gfp_mask;
if (buffer_heads_over_limit) {
sc->gfp_mask |= __GFP_HIGHMEM;
- sc->reclaim_idx = classzone_idx = gfp_zone(sc->gfp_mask);
+ sc->reclaim_idx = gfp_zone(sc->gfp_mask);
}
for_each_zone_zonelist_nodemask(zone, z, zonelist,
sc->reclaim_idx, sc->nodemask) {
- if (!populated_zone(zone))
- continue;
-
- /*
- * Note that reclaim_idx does not change as it is the highest
- * zone reclaimed from which for empty zones is a no-op but
- * classzone_idx is used by shrink_node to test if the slabs
- * should be shrunk on a given node.
- */
- classzone_idx = sc->reclaim_idx;
- while (!populated_zone(zone->zone_pgdat->node_zones +
- classzone_idx))
- classzone_idx--;
-
/*
* Take care memory controller reclaiming has small influence
* to global LRU.
*/
if (IS_ENABLED(CONFIG_COMPACTION) &&
sc->order > PAGE_ALLOC_COSTLY_ORDER &&
- zonelist_zone_idx(z) <= classzone_idx &&
- compaction_ready(zone, sc->order, classzone_idx)) {
+ compaction_ready(zone, sc)) {
sc->compaction_ready = true;
continue;
}
if (zone->zone_pgdat == last_pgdat)
continue;
last_pgdat = zone->zone_pgdat;
- shrink_node(zone->zone_pgdat, sc, classzone_idx);
+ shrink_node(zone->zone_pgdat, sc);
}
/*
delayacct_freepages_start();
if (global_reclaim(sc))
- count_vm_event(ALLOCSTALL);
+ __count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1);
do {
vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
trace_mm_vmscan_direct_reclaim_begin(order,
sc.may_writepage,
- gfp_mask);
+ gfp_mask,
+ sc.reclaim_idx);
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order,
sc.may_writepage,
- sc.gfp_mask);
+ sc.gfp_mask,
+ sc.reclaim_idx);
/*
* NOTE: Although we can get the priority field, using it
trace_mm_vmscan_memcg_reclaim_begin(0,
sc.may_writepage,
- sc.gfp_mask);
+ sc.gfp_mask,
+ sc.reclaim_idx);
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
do {
struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
- if (inactive_list_is_low(lruvec, false))
+ if (inactive_list_is_low(lruvec, false, sc))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
*
* Returns true if kswapd is ready to sleep
*/
-static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
- int classzone_idx)
+static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx)
{
int i;
- /* If a direct reclaimer woke kswapd within HZ/10, it's premature */
- if (remaining)
- return false;
-
/*
* The throttled processes are normally woken up in balance_pgdat() as
* soon as pfmemalloc_watermark_ok() is true. But there is a potential
* This is used to determine if the scanning priority needs to be raised.
*/
static bool kswapd_shrink_node(pg_data_t *pgdat,
- int classzone_idx,
struct scan_control *sc)
{
struct zone *zone;
/* Reclaim a number of pages proportional to the number of zones */
sc->nr_to_reclaim = 0;
- for (z = 0; z <= classzone_idx; z++) {
+ for (z = 0; z <= sc->reclaim_idx; z++) {
zone = pgdat->node_zones + z;
if (!populated_zone(zone))
continue;
* Historically care was taken to put equal pressure on all zones but
* now pressure is applied based on node LRU order.
*/
- shrink_node(pgdat, sc, classzone_idx);
+ shrink_node(pgdat, sc);
/*
* Fragmentation may mean that the system cannot be rebalanced for
.may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = 1,
- .reclaim_idx = classzone_idx,
};
count_vm_event(PAGEOUTRUN);
bool raise_priority = true;
sc.nr_reclaimed = 0;
+ sc.reclaim_idx = classzone_idx;
/*
- * If the number of buffer_heads in the machine exceeds the
- * maximum allowed level then reclaim from all zones. This is
- * not specific to highmem as highmem may not exist but it is
- * it is expected that buffer_heads are stripped in writeback.
+ * If the number of buffer_heads exceeds the maximum allowed
+ * then consider reclaiming from all zones. This has a dual
+ * purpose -- on 64-bit systems it is expected that
+ * buffer_heads are stripped during active rotation. On 32-bit
+ * systems, highmem pages can pin lowmem memory and shrinking
+ * buffers can relieve lowmem pressure. Reclaim may still not
+ * go ahead if all eligible zones for the original allocation
+ * request are balanced to avoid excessive reclaim from kswapd.
*/
if (buffer_heads_over_limit) {
for (i = MAX_NR_ZONES - 1; i >= 0; i--) {
if (!populated_zone(zone))
continue;
- classzone_idx = i;
+ sc.reclaim_idx = i;
break;
}
}
* Scanning from low to high zone would allow congestion to be
* cleared during a very small window when a small low
* zone was balanced even under extreme pressure when the
- * overall node may be congested.
+ * overall node may be congested. Note that sc.reclaim_idx
+ * is not used as buffer_heads_over_limit may have adjusted
+ * it.
*/
for (i = classzone_idx; i >= 0; i--) {
zone = pgdat->node_zones + i;
* enough pages are already being scanned that that high
* watermark would be met at 100% efficiency.
*/
- if (kswapd_shrink_node(pgdat, classzone_idx, &sc))
+ if (kswapd_shrink_node(pgdat, &sc))
raise_priority = false;
/*
prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
/* Try to sleep for a short interval */
- if (prepare_kswapd_sleep(pgdat, reclaim_order, remaining, classzone_idx)) {
+ if (prepare_kswapd_sleep(pgdat, reclaim_order, classzone_idx)) {
/*
* Compaction records what page blocks it recently failed to
* isolate pages from and skips them in the future scanning.
* After a short sleep, check if it was a premature sleep. If not, then
* go fully to sleep until explicitly woken up.
*/
- if (prepare_kswapd_sleep(pgdat, reclaim_order, remaining, classzone_idx)) {
+ if (!remaining &&
+ prepare_kswapd_sleep(pgdat, reclaim_order, classzone_idx)) {
trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
/*
* but kcompactd is woken to compact for the original
* request (alloc_order).
*/
- trace_mm_vmscan_kswapd_wake(pgdat->node_id, alloc_order);
+ trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx,
+ alloc_order);
reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx);
if (reclaim_order < alloc_order)
goto kswapd_try_sleep;
void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
{
pg_data_t *pgdat;
+ int z;
if (!populated_zone(zone))
return;
pgdat->kswapd_order = max(pgdat->kswapd_order, order);
if (!waitqueue_active(&pgdat->kswapd_wait))
return;
- if (zone_balanced(zone, order, 0))
- return;
+
+ /* Only wake kswapd if all zones are unbalanced */
+ for (z = 0; z <= classzone_idx; z++) {
+ zone = pgdat->node_zones + z;
+ if (!populated_zone(zone))
+ continue;
+
+ if (zone_balanced(zone, order, classzone_idx))
+ return;
+ }
trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
wake_up_interruptible(&pgdat->kswapd_wait);
#ifdef CONFIG_NUMA
/*
- * Zone reclaim mode
+ * Node reclaim mode
*
- * If non-zero call zone_reclaim when the number of free pages falls below
+ * If non-zero call node_reclaim when the number of free pages falls below
* the watermarks.
*/
-int zone_reclaim_mode __read_mostly;
+int node_reclaim_mode __read_mostly;
#define RECLAIM_OFF 0
#define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */
#define RECLAIM_UNMAP (1<<2) /* Unmap pages during reclaim */
/*
- * Priority for ZONE_RECLAIM. This determines the fraction of pages
+ * Priority for NODE_RECLAIM. This determines the fraction of pages
* of a node considered for each zone_reclaim. 4 scans 1/16th of
* a zone.
*/
-#define ZONE_RECLAIM_PRIORITY 4
+#define NODE_RECLAIM_PRIORITY 4
/*
- * Percentage of pages in a zone that must be unmapped for zone_reclaim to
+ * Percentage of pages in a zone that must be unmapped for node_reclaim to
* occur.
*/
int sysctl_min_unmapped_ratio = 1;
*/
int sysctl_min_slab_ratio = 5;
-static inline unsigned long zone_unmapped_file_pages(struct zone *zone)
+static inline unsigned long node_unmapped_file_pages(struct pglist_data *pgdat)
{
- unsigned long file_mapped = zone_page_state(zone, NR_FILE_MAPPED);
- unsigned long file_lru = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE) +
- node_page_state(zone->zone_pgdat, NR_ACTIVE_FILE);
+ unsigned long file_mapped = node_page_state(pgdat, NR_FILE_MAPPED);
+ unsigned long file_lru = node_page_state(pgdat, NR_INACTIVE_FILE) +
+ node_page_state(pgdat, NR_ACTIVE_FILE);
/*
* It's possible for there to be more file mapped pages than
}
/* Work out how many page cache pages we can reclaim in this reclaim_mode */
-static unsigned long zone_pagecache_reclaimable(struct zone *zone)
+static unsigned long node_pagecache_reclaimable(struct pglist_data *pgdat)
{
unsigned long nr_pagecache_reclaimable;
unsigned long delta = 0;
/*
* If RECLAIM_UNMAP is set, then all file pages are considered
* potentially reclaimable. Otherwise, we have to worry about
- * pages like swapcache and zone_unmapped_file_pages() provides
+ * pages like swapcache and node_unmapped_file_pages() provides
* a better estimate
*/
- if (zone_reclaim_mode & RECLAIM_UNMAP)
- nr_pagecache_reclaimable = zone_page_state(zone, NR_FILE_PAGES);
+ if (node_reclaim_mode & RECLAIM_UNMAP)
+ nr_pagecache_reclaimable = node_page_state(pgdat, NR_FILE_PAGES);
else
- nr_pagecache_reclaimable = zone_unmapped_file_pages(zone);
+ nr_pagecache_reclaimable = node_unmapped_file_pages(pgdat);
/* If we can't clean pages, remove dirty pages from consideration */
- if (!(zone_reclaim_mode & RECLAIM_WRITE))
- delta += zone_page_state(zone, NR_FILE_DIRTY);
+ if (!(node_reclaim_mode & RECLAIM_WRITE))
+ delta += node_page_state(pgdat, NR_FILE_DIRTY);
/* Watch for any possible underflows due to delta */
if (unlikely(delta > nr_pagecache_reclaimable))
}
/*
- * Try to free up some pages from this zone through reclaim.
+ * Try to free up some pages from this node through reclaim.
*/
-static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
+static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
{
/* Minimum pages needed in order to stay on node */
const unsigned long nr_pages = 1 << order;
struct task_struct *p = current;
struct reclaim_state reclaim_state;
+ int classzone_idx = gfp_zone(gfp_mask);
struct scan_control sc = {
.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
.gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
.order = order,
- .priority = ZONE_RECLAIM_PRIORITY,
- .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
- .may_unmap = !!(zone_reclaim_mode & RECLAIM_UNMAP),
+ .priority = NODE_RECLAIM_PRIORITY,
+ .may_writepage = !!(node_reclaim_mode & RECLAIM_WRITE),
+ .may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
.may_swap = 1,
- .reclaim_idx = zone_idx(zone),
+ .reclaim_idx = classzone_idx,
};
cond_resched();
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
- if (zone_pagecache_reclaimable(zone) > zone->min_unmapped_pages) {
+ if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) {
/*
* Free memory by calling shrink zone with increasing
* priorities until we have enough memory freed.
*/
do {
- shrink_node(zone->zone_pgdat, &sc, zone_idx(zone));
+ shrink_node(pgdat, &sc);
} while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0);
}
return sc.nr_reclaimed >= nr_pages;
}
-int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
+int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
{
- int node_id;
int ret;
/*
- * Zone reclaim reclaims unmapped file backed pages and
+ * Node reclaim reclaims unmapped file backed pages and
* slab pages if we are over the defined limits.
*
* A small portion of unmapped file backed pages is needed for
* file I/O otherwise pages read by file I/O will be immediately
- * thrown out if the zone is overallocated. So we do not reclaim
- * if less than a specified percentage of the zone is used by
+ * thrown out if the node is overallocated. So we do not reclaim
+ * if less than a specified percentage of the node is used by
* unmapped file backed pages.
*/
- if (zone_pagecache_reclaimable(zone) <= zone->min_unmapped_pages &&
- zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages)
- return ZONE_RECLAIM_FULL;
+ if (node_pagecache_reclaimable(pgdat) <= pgdat->min_unmapped_pages &&
+ sum_zone_node_page_state(pgdat->node_id, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages)
+ return NODE_RECLAIM_FULL;
- if (!pgdat_reclaimable(zone->zone_pgdat))
- return ZONE_RECLAIM_FULL;
+ if (!pgdat_reclaimable(pgdat))
+ return NODE_RECLAIM_FULL;
/*
* Do not scan if the allocation should not be delayed.
*/
if (!gfpflags_allow_blocking(gfp_mask) || (current->flags & PF_MEMALLOC))
- return ZONE_RECLAIM_NOSCAN;
+ return NODE_RECLAIM_NOSCAN;
/*
- * Only run zone reclaim on the local zone or on zones that do not
+ * Only run node reclaim on the local node or on nodes that do not
* have associated processors. This will favor the local processor
* over remote processors and spread off node memory allocations
* as wide as possible.
*/
- node_id = zone_to_nid(zone);
- if (node_state(node_id, N_CPU) && node_id != numa_node_id())
- return ZONE_RECLAIM_NOSCAN;
+ if (node_state(pgdat->node_id, N_CPU) && pgdat->node_id != numa_node_id())
+ return NODE_RECLAIM_NOSCAN;
- if (test_and_set_bit(ZONE_RECLAIM_LOCKED, &zone->flags))
- return ZONE_RECLAIM_NOSCAN;
+ if (test_and_set_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags))
+ return NODE_RECLAIM_NOSCAN;
- ret = __zone_reclaim(zone, gfp_mask, order);
- clear_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
+ ret = __node_reclaim(pgdat, gfp_mask, order);
+ clear_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags);
if (!ret)
count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED);
void check_move_unevictable_pages(struct page **pages, int nr_pages)
{
struct lruvec *lruvec;
- struct zone *zone = NULL;
+ struct pglist_data *pgdat = NULL;
int pgscanned = 0;
int pgrescued = 0;
int i;
for (i = 0; i < nr_pages; i++) {
struct page *page = pages[i];
- struct zone *pagezone;
+ struct pglist_data *pagepgdat = page_pgdat(page);
pgscanned++;
- pagezone = page_zone(page);
- if (pagezone != zone) {
- if (zone)
- spin_unlock_irq(zone_lru_lock(zone));
- zone = pagezone;
- spin_lock_irq(zone_lru_lock(zone));
+ if (pagepgdat != pgdat) {
+ if (pgdat)
+ spin_unlock_irq(&pgdat->lru_lock);
+ pgdat = pagepgdat;
+ spin_lock_irq(&pgdat->lru_lock);
}
- lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
+ lruvec = mem_cgroup_page_lruvec(page, pgdat);
if (!PageLRU(page) || !PageUnevictable(page))
continue;
}
}
- if (zone) {
+ if (pgdat) {
__count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
__count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
- spin_unlock_irq(zone_lru_lock(zone));
+ spin_unlock_irq(&pgdat->lru_lock);
}
}
#endif /* CONFIG_SHMEM */