}
#endif
+/*
+ * This misses isolated pages which are not accounted for to save counters.
+ * As the data only determines if reclaim or compaction continues, it is
+ * not expected that isolated pages will be a dominating factor.
+ */
+unsigned long zone_reclaimable_pages(struct zone *zone)
+{
+ unsigned long nr;
+
+ nr = zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_FILE) +
+ zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_FILE);
+ if (get_nr_swap_pages() > 0)
+ nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) +
+ zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON);
+
+ return nr;
+}
+
unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat)
{
unsigned long nr;
enum lru_list lru, unsigned long *nr_zone_taken,
unsigned long nr_taken)
{
-#ifdef CONFIG_HIGHMEM
int zid;
- /*
- * Highmem has separate accounting for highmem pages so each zone
- * is updated separately.
- */
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
if (!nr_zone_taken[zid])
continue;
__update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]);
}
-#else
- /* Zone ID does not matter on !HIGHMEM */
- __update_lru_size(lruvec, lru, 0, -nr_taken);
-#endif
#ifdef CONFIG_MEMCG
mem_cgroup_update_lru_size(lruvec, lru, -nr_taken);
LIST_HEAD(pages_skipped);
for (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan &&
- !list_empty(src); scan++) {
+ !list_empty(src);) {
struct page *page;
page = lru_to_page(src);
continue;
}
+ /*
+ * Account for scanned and skipped separetly to avoid the pgdat
+ * being prematurely marked unreclaimable by pgdat_reclaimable.
+ */
+ scan++;
+
switch (__isolate_lru_page(page, mode)) {
case 0:
nr_pages = hpage_nr_pages(page);
*/
if (!list_empty(&pages_skipped)) {
int zid;
+ unsigned long total_skipped = 0;
- list_splice(&pages_skipped, src);
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
if (!nr_skipped[zid])
continue;
__count_zid_vm_events(PGSCAN_SKIP, zid, nr_skipped[zid]);
+ total_skipped += nr_skipped[zid];
}
+
+ /*
+ * Account skipped pages as a partial scan as the pgdat may be
+ * close to unreclaimable. If the LRU list is empty, account
+ * skipped pages as a full scan.
+ */
+ scan += list_empty(src) ? total_skipped : total_skipped >> 2;
+
+ list_splice(&pages_skipped, src);
}
*nr_scanned = scan;
trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, scan,
bdi_write_congested(current->backing_dev_info);
}
+static bool inactive_reclaimable_pages(struct lruvec *lruvec,
+ struct scan_control *sc, enum lru_list lru)
+{
+ int zid;
+ struct zone *zone;
+ int file = is_file_lru(lru);
+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+
+ if (!global_reclaim(sc))
+ return true;
+
+ for (zid = sc->reclaim_idx; zid >= 0; zid--) {
+ zone = &pgdat->node_zones[zid];
+ if (!populated_zone(zone))
+ continue;
+
+ if (zone_page_state_snapshot(zone, NR_ZONE_LRU_BASE +
+ LRU_FILE * file) >= SWAP_CLUSTER_MAX)
+ return true;
+ }
+
+ return false;
+}
+
/*
* shrink_inactive_list() is a helper for shrink_node(). It returns the number
* of reclaimed pages
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
+ if (!inactive_reclaimable_pages(lruvec, sc, lru))
+ return 0;
+
while (unlikely(too_many_isolated(pgdat, file, sc))) {
congestion_wait(BLK_RW_ASYNC, HZ/10);
* 1TB 101 10GB
* 10TB 320 32GB
*/
-static bool inactive_list_is_low(struct lruvec *lruvec, bool file)
+static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
+ struct scan_control *sc)
{
unsigned long inactive_ratio;
unsigned long inactive;
unsigned long active;
unsigned long gb;
+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+ int zid;
/*
* If we don't have swap space, anonymous page deactivation
inactive = lruvec_lru_size(lruvec, file * LRU_FILE);
active = lruvec_lru_size(lruvec, file * LRU_FILE + LRU_ACTIVE);
+ /*
+ * For zone-constrained allocations, it is necessary to check if
+ * deactivations are required for lowmem to be reclaimed. This
+ * calculates the inactive/active pages available in eligible zones.
+ */
+ for (zid = sc->reclaim_idx + 1; zid < MAX_NR_ZONES; zid++) {
+ struct zone *zone = &pgdat->node_zones[zid];
+ unsigned long inactive_zone, active_zone;
+
+ if (!populated_zone(zone))
+ continue;
+
+ inactive_zone = zone_page_state(zone,
+ NR_ZONE_LRU_BASE + (file * LRU_FILE));
+ active_zone = zone_page_state(zone,
+ NR_ZONE_LRU_BASE + (file * LRU_FILE) + LRU_ACTIVE);
+
+ inactive -= min(inactive, inactive_zone);
+ active -= min(active, active_zone);
+ }
+
gb = (inactive + active) >> (30 - PAGE_SHIFT);
if (gb)
inactive_ratio = int_sqrt(10 * gb);
struct lruvec *lruvec, struct scan_control *sc)
{
if (is_active_lru(lru)) {
- if (inactive_list_is_low(lruvec, is_file_lru(lru)))
+ if (inactive_list_is_low(lruvec, is_file_lru(lru), sc))
shrink_active_list(nr_to_scan, lruvec, sc, lru);
return 0;
}
* lruvec even if it has plenty of old anonymous pages unless the
* system is under heavy pressure.
*/
- if (!inactive_list_is_low(lruvec, true) &&
+ if (!inactive_list_is_low(lruvec, true, sc) &&
lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) {
scan_balance = SCAN_FILE;
goto out;
* Even if we did not try to evict anon pages at all, we want to
* rebalance the anon lru active/inactive ratio.
*/
- if (inactive_list_is_low(lruvec, false))
+ if (inactive_list_is_low(lruvec, false, sc))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
shrink_node_memcg(pgdat, memcg, sc, &lru_pages);
node_lru_pages += lru_pages;
- if (!global_reclaim(sc))
+ if (memcg)
shrink_slab(sc->gfp_mask, pgdat->node_id,
memcg, sc->nr_scanned - scanned,
lru_pages);
for_each_zone_zonelist_nodemask(zone, z, zonelist,
sc->reclaim_idx, sc->nodemask) {
- if (!populated_zone(zone))
- continue;
-
/*
* Take care memory controller reclaiming has small influence
* to global LRU.
do {
struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
- if (inactive_list_is_low(lruvec, false))
+ if (inactive_list_is_low(lruvec, false, sc))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
void check_move_unevictable_pages(struct page **pages, int nr_pages)
{
struct lruvec *lruvec;
- struct zone *zone = NULL;
+ struct pglist_data *pgdat = NULL;
int pgscanned = 0;
int pgrescued = 0;
int i;
for (i = 0; i < nr_pages; i++) {
struct page *page = pages[i];
- struct zone *pagezone;
+ struct pglist_data *pagepgdat = page_pgdat(page);
pgscanned++;
- pagezone = page_zone(page);
- if (pagezone != zone) {
- if (zone)
- spin_unlock_irq(zone_lru_lock(zone));
- zone = pagezone;
- spin_lock_irq(zone_lru_lock(zone));
+ if (pagepgdat != pgdat) {
+ if (pgdat)
+ spin_unlock_irq(&pgdat->lru_lock);
+ pgdat = pagepgdat;
+ spin_lock_irq(&pgdat->lru_lock);
}
- lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
+ lruvec = mem_cgroup_page_lruvec(page, pgdat);
if (!PageLRU(page) || !PageUnevictable(page))
continue;
}
}
- if (zone) {
+ if (pgdat) {
__count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
__count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
- spin_unlock_irq(zone_lru_lock(zone));
+ spin_unlock_irq(&pgdat->lru_lock);
}
}
#endif /* CONFIG_SHMEM */