X-Git-Url: https://git.kernel.dk/?a=blobdiff_plain;f=mm%2Fvmscan.c;h=a52167eabc967ff0475800279e6e64810d315881;hb=31483b6ad205784d3a82240865135bef5c97c105;hp=c4a2f4512fcaccf00e5ec6f41eddcdce0e18fb7d;hpb=446985428d2cd10efd5d139c33de16c50ee771ba;p=linux-2.6-block.git diff --git a/mm/vmscan.c b/mm/vmscan.c index c4a2f4512fca..a52167eabc96 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -84,6 +84,9 @@ struct scan_control { /* Scan (total_size >> priority) pages at once */ int priority; + /* The highest zone to isolate pages for reclaim from */ + enum zone_type reclaim_idx; + unsigned int may_writepage:1; /* Can mapped pages be reclaimed? */ @@ -191,26 +194,42 @@ static bool sane_reclaim(struct scan_control *sc) } #endif +/* + * This misses isolated pages which are not accounted for to save counters. + * As the data only determines if reclaim or compaction continues, it is + * not expected that isolated pages will be a dominating factor. + */ unsigned long zone_reclaimable_pages(struct zone *zone) { unsigned long nr; - nr = zone_page_state_snapshot(zone, NR_ACTIVE_FILE) + - zone_page_state_snapshot(zone, NR_INACTIVE_FILE) + - zone_page_state_snapshot(zone, NR_ISOLATED_FILE); + nr = zone_page_state_snapshot(zone, NR_ZONE_LRU_FILE); + if (get_nr_swap_pages() > 0) + nr += zone_page_state_snapshot(zone, NR_ZONE_LRU_ANON); + + return nr; +} + +unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat) +{ + unsigned long nr; + + nr = node_page_state_snapshot(pgdat, NR_ACTIVE_FILE) + + node_page_state_snapshot(pgdat, NR_INACTIVE_FILE) + + node_page_state_snapshot(pgdat, NR_ISOLATED_FILE); if (get_nr_swap_pages() > 0) - nr += zone_page_state_snapshot(zone, NR_ACTIVE_ANON) + - zone_page_state_snapshot(zone, NR_INACTIVE_ANON) + - zone_page_state_snapshot(zone, NR_ISOLATED_ANON); + nr += node_page_state_snapshot(pgdat, NR_ACTIVE_ANON) + + node_page_state_snapshot(pgdat, NR_INACTIVE_ANON) + + node_page_state_snapshot(pgdat, NR_ISOLATED_ANON); return nr; } -bool zone_reclaimable(struct zone *zone) +bool pgdat_reclaimable(struct pglist_data *pgdat) { - return zone_page_state_snapshot(zone, NR_PAGES_SCANNED) < - zone_reclaimable_pages(zone) * 6; + return node_page_state_snapshot(pgdat, NR_PAGES_SCANNED) < + pgdat_reclaimable_pages(pgdat) * 6; } unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru) @@ -218,7 +237,7 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru) if (!mem_cgroup_disabled()) return mem_cgroup_get_lru_size(lruvec, lru); - return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru); + return node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru); } /* @@ -877,7 +896,7 @@ static void page_check_dirty_writeback(struct page *page, * shrink_page_list() returns the number of reclaimed pages */ static unsigned long shrink_page_list(struct list_head *page_list, - struct zone *zone, + struct pglist_data *pgdat, struct scan_control *sc, enum ttu_flags ttu_flags, unsigned long *ret_nr_dirty, @@ -917,7 +936,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto keep; VM_BUG_ON_PAGE(PageActive(page), page); - VM_BUG_ON_PAGE(page_zone(page) != zone, page); sc->nr_scanned++; @@ -996,7 +1014,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, /* Case 1 above */ if (current_is_kswapd() && PageReclaim(page) && - test_bit(ZONE_WRITEBACK, &zone->flags)) { + test_bit(PGDAT_WRITEBACK, &pgdat->flags)) { nr_immediate++; goto keep_locked; @@ -1055,8 +1073,14 @@ static unsigned long shrink_page_list(struct list_head *page_list, /* Adding to swap updated mapping */ mapping = page_mapping(page); + } else if (unlikely(PageTransHuge(page))) { + /* Split file THP */ + if (split_huge_page_to_list(page, page_list)) + goto keep_locked; } + VM_BUG_ON_PAGE(PageTransHuge(page), page); + /* * The page is mapped into the page tables of one or more * processes. Try to unmap it here. @@ -1086,7 +1110,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, */ if (page_is_file_cache(page) && (!current_is_kswapd() || - !test_bit(ZONE_DIRTY, &zone->flags))) { + !test_bit(PGDAT_DIRTY, &pgdat->flags))) { /* * Immediately reclaim when written back. * Similar in principal to deactivate_page() @@ -1254,17 +1278,17 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, list_for_each_entry_safe(page, next, page_list, lru) { if (page_is_file_cache(page) && !PageDirty(page) && - !isolated_balloon_page(page)) { + !__PageMovable(page)) { ClearPageActive(page); list_move(&page->lru, &clean_pages); } } - ret = shrink_page_list(&clean_pages, zone, &sc, + ret = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc, TTU_UNMAP|TTU_IGNORE_ACCESS, &dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true); list_splice(&clean_pages, page_list); - mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret); + mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -ret); return ret; } @@ -1343,7 +1367,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode) } /* - * zone->lru_lock is heavily contended. Some of the functions that + * zone_lru_lock is heavily contended. Some of the functions that * shrink the lists perform better by taking out a batch of pages * and working on them outside the LRU lock. * @@ -1369,7 +1393,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, { struct list_head *src = &lruvec->lists[lru]; unsigned long nr_taken = 0; - unsigned long scan; + unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 }; + unsigned long scan, nr_pages; + LIST_HEAD(pages_skipped); for (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan && !list_empty(src); scan++) { @@ -1380,9 +1406,16 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, VM_BUG_ON_PAGE(!PageLRU(page), page); + if (page_zonenum(page) > sc->reclaim_idx) { + list_move(&page->lru, &pages_skipped); + continue; + } + switch (__isolate_lru_page(page, mode)) { case 0: - nr_taken += hpage_nr_pages(page); + nr_pages = hpage_nr_pages(page); + nr_taken += nr_pages; + nr_zone_taken[page_zonenum(page)] += nr_pages; list_move(&page->lru, dst); break; @@ -1396,9 +1429,25 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, } } + /* + * Splice any skipped pages to the start of the LRU list. Note that + * this disrupts the LRU order when reclaiming for lower zones but + * we cannot splice to the tail. If we did then the SWAP_CLUSTER_MAX + * scanning would soon rescan the same pages to skip and put the + * system at risk of premature OOM. + */ + if (!list_empty(&pages_skipped)) + list_splice(&pages_skipped, src); *nr_scanned = scan; trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan, nr_taken, mode, is_file_lru(lru)); + for (scan = 0; scan < MAX_NR_ZONES; scan++) { + nr_pages = nr_zone_taken[scan]; + if (!nr_pages) + continue; + + update_lru_size(lruvec, lru, scan, -nr_pages); + } return nr_taken; } @@ -1438,8 +1487,8 @@ int isolate_lru_page(struct page *page) struct zone *zone = page_zone(page); struct lruvec *lruvec; - spin_lock_irq(&zone->lru_lock); - lruvec = mem_cgroup_page_lruvec(page, zone); + spin_lock_irq(zone_lru_lock(zone)); + lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat); if (PageLRU(page)) { int lru = page_lru(page); get_page(page); @@ -1447,7 +1496,7 @@ int isolate_lru_page(struct page *page) del_page_from_lru_list(page, lruvec, lru); ret = 0; } - spin_unlock_irq(&zone->lru_lock); + spin_unlock_irq(zone_lru_lock(zone)); } return ret; } @@ -1459,7 +1508,7 @@ int isolate_lru_page(struct page *page) * the LRU list will go small and be scanned faster than necessary, leading to * unnecessary swapping, thrashing and OOM. */ -static int too_many_isolated(struct zone *zone, int file, +static int too_many_isolated(struct pglist_data *pgdat, int file, struct scan_control *sc) { unsigned long inactive, isolated; @@ -1471,11 +1520,11 @@ static int too_many_isolated(struct zone *zone, int file, return 0; if (file) { - inactive = zone_page_state(zone, NR_INACTIVE_FILE); - isolated = zone_page_state(zone, NR_ISOLATED_FILE); + inactive = node_page_state(pgdat, NR_INACTIVE_FILE); + isolated = node_page_state(pgdat, NR_ISOLATED_FILE); } else { - inactive = zone_page_state(zone, NR_INACTIVE_ANON); - isolated = zone_page_state(zone, NR_ISOLATED_ANON); + inactive = node_page_state(pgdat, NR_INACTIVE_ANON); + isolated = node_page_state(pgdat, NR_ISOLATED_ANON); } /* @@ -1493,7 +1542,7 @@ static noinline_for_stack void putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) { struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; - struct zone *zone = lruvec_zone(lruvec); + struct pglist_data *pgdat = lruvec_pgdat(lruvec); LIST_HEAD(pages_to_free); /* @@ -1506,13 +1555,13 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) VM_BUG_ON_PAGE(PageLRU(page), page); list_del(&page->lru); if (unlikely(!page_evictable(page))) { - spin_unlock_irq(&zone->lru_lock); + spin_unlock_irq(&pgdat->lru_lock); putback_lru_page(page); - spin_lock_irq(&zone->lru_lock); + spin_lock_irq(&pgdat->lru_lock); continue; } - lruvec = mem_cgroup_page_lruvec(page, zone); + lruvec = mem_cgroup_page_lruvec(page, pgdat); SetPageLRU(page); lru = page_lru(page); @@ -1529,10 +1578,10 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) del_page_from_lru_list(page, lruvec, lru); if (unlikely(PageCompound(page))) { - spin_unlock_irq(&zone->lru_lock); + spin_unlock_irq(&pgdat->lru_lock); mem_cgroup_uncharge(page); (*get_compound_page_dtor(page))(page); - spin_lock_irq(&zone->lru_lock); + spin_lock_irq(&pgdat->lru_lock); } else list_add(&page->lru, &pages_to_free); } @@ -1558,7 +1607,7 @@ static int current_may_throttle(void) } /* - * shrink_inactive_list() is a helper for shrink_zone(). It returns the number + * shrink_inactive_list() is a helper for shrink_node(). It returns the number * of reclaimed pages */ static noinline_for_stack unsigned long @@ -1576,10 +1625,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, unsigned long nr_immediate = 0; isolate_mode_t isolate_mode = 0; int file = is_file_lru(lru); - struct zone *zone = lruvec_zone(lruvec); + struct pglist_data *pgdat = lruvec_pgdat(lruvec); struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; - while (unlikely(too_many_isolated(zone, file, sc))) { + while (unlikely(too_many_isolated(pgdat, file, sc))) { congestion_wait(BLK_RW_ASYNC, HZ/10); /* We are about to die and free our memory. Return now. */ @@ -1594,48 +1643,45 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, if (!sc->may_writepage) isolate_mode |= ISOLATE_CLEAN; - spin_lock_irq(&zone->lru_lock); + spin_lock_irq(&pgdat->lru_lock); nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list, &nr_scanned, sc, isolate_mode, lru); - update_lru_size(lruvec, lru, -nr_taken); - __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); + __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken); reclaim_stat->recent_scanned[file] += nr_taken; if (global_reclaim(sc)) { - __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned); + __mod_node_page_state(pgdat, NR_PAGES_SCANNED, nr_scanned); if (current_is_kswapd()) - __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned); + __count_vm_events(PGSCAN_KSWAPD, nr_scanned); else - __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scanned); + __count_vm_events(PGSCAN_DIRECT, nr_scanned); } - spin_unlock_irq(&zone->lru_lock); + spin_unlock_irq(&pgdat->lru_lock); if (nr_taken == 0) return 0; - nr_reclaimed = shrink_page_list(&page_list, zone, sc, TTU_UNMAP, + nr_reclaimed = shrink_page_list(&page_list, pgdat, sc, TTU_UNMAP, &nr_dirty, &nr_unqueued_dirty, &nr_congested, &nr_writeback, &nr_immediate, false); - spin_lock_irq(&zone->lru_lock); + spin_lock_irq(&pgdat->lru_lock); if (global_reclaim(sc)) { if (current_is_kswapd()) - __count_zone_vm_events(PGSTEAL_KSWAPD, zone, - nr_reclaimed); + __count_vm_events(PGSTEAL_KSWAPD, nr_reclaimed); else - __count_zone_vm_events(PGSTEAL_DIRECT, zone, - nr_reclaimed); + __count_vm_events(PGSTEAL_DIRECT, nr_reclaimed); } putback_inactive_pages(lruvec, &page_list); - __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); + __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); - spin_unlock_irq(&zone->lru_lock); + spin_unlock_irq(&pgdat->lru_lock); mem_cgroup_uncharge_list(&page_list); free_hot_cold_page_list(&page_list, true); @@ -1655,7 +1701,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * are encountered in the nr_immediate check below. */ if (nr_writeback && nr_writeback == nr_taken) - set_bit(ZONE_WRITEBACK, &zone->flags); + set_bit(PGDAT_WRITEBACK, &pgdat->flags); /* * Legacy memcg will stall in page writeback so avoid forcibly @@ -1667,16 +1713,16 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * backed by a congested BDI and wait_iff_congested will stall. */ if (nr_dirty && nr_dirty == nr_congested) - set_bit(ZONE_CONGESTED, &zone->flags); + set_bit(PGDAT_CONGESTED, &pgdat->flags); /* * If dirty pages are scanned that are not queued for IO, it * implies that flushers are not keeping up. In this case, flag - * the zone ZONE_DIRTY and kswapd will start writing pages from + * the pgdat PGDAT_DIRTY and kswapd will start writing pages from * reclaim context. */ if (nr_unqueued_dirty == nr_taken) - set_bit(ZONE_DIRTY, &zone->flags); + set_bit(PGDAT_DIRTY, &pgdat->flags); /* * If kswapd scans pages marked marked for immediate @@ -1695,9 +1741,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, */ if (!sc->hibernation_mode && !current_is_kswapd() && current_may_throttle()) - wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); + wait_iff_congested(pgdat, BLK_RW_ASYNC, HZ/10); - trace_mm_vmscan_lru_shrink_inactive(zone, nr_scanned, nr_reclaimed, + trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id, + nr_scanned, nr_reclaimed, sc->priority, file); return nr_reclaimed; } @@ -1709,9 +1756,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * processes, from rmap. * * If the pages are mostly unmapped, the processing is fast and it is - * appropriate to hold zone->lru_lock across the whole operation. But if + * appropriate to hold zone_lru_lock across the whole operation. But if * the pages are mapped, the processing is slow (page_referenced()) so we - * should drop zone->lru_lock around each page. It's impossible to balance + * should drop zone_lru_lock around each page. It's impossible to balance * this, so instead we remove the pages from the LRU while processing them. * It is safe to rely on PG_active against the non-LRU pages in here because * nobody will play with that bit on a non-LRU page. @@ -1725,20 +1772,20 @@ static void move_active_pages_to_lru(struct lruvec *lruvec, struct list_head *pages_to_free, enum lru_list lru) { - struct zone *zone = lruvec_zone(lruvec); + struct pglist_data *pgdat = lruvec_pgdat(lruvec); unsigned long pgmoved = 0; struct page *page; int nr_pages; while (!list_empty(list)) { page = lru_to_page(list); - lruvec = mem_cgroup_page_lruvec(page, zone); + lruvec = mem_cgroup_page_lruvec(page, pgdat); VM_BUG_ON_PAGE(PageLRU(page), page); SetPageLRU(page); nr_pages = hpage_nr_pages(page); - update_lru_size(lruvec, lru, nr_pages); + update_lru_size(lruvec, lru, page_zonenum(page), nr_pages); list_move(&page->lru, &lruvec->lists[lru]); pgmoved += nr_pages; @@ -1748,10 +1795,10 @@ static void move_active_pages_to_lru(struct lruvec *lruvec, del_page_from_lru_list(page, lruvec, lru); if (unlikely(PageCompound(page))) { - spin_unlock_irq(&zone->lru_lock); + spin_unlock_irq(&pgdat->lru_lock); mem_cgroup_uncharge(page); (*get_compound_page_dtor(page))(page); - spin_lock_irq(&zone->lru_lock); + spin_lock_irq(&pgdat->lru_lock); } else list_add(&page->lru, pages_to_free); } @@ -1777,7 +1824,7 @@ static void shrink_active_list(unsigned long nr_to_scan, unsigned long nr_rotated = 0; isolate_mode_t isolate_mode = 0; int file = is_file_lru(lru); - struct zone *zone = lruvec_zone(lruvec); + struct pglist_data *pgdat = lruvec_pgdat(lruvec); lru_add_drain(); @@ -1786,20 +1833,19 @@ static void shrink_active_list(unsigned long nr_to_scan, if (!sc->may_writepage) isolate_mode |= ISOLATE_CLEAN; - spin_lock_irq(&zone->lru_lock); + spin_lock_irq(&pgdat->lru_lock); nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold, &nr_scanned, sc, isolate_mode, lru); - update_lru_size(lruvec, lru, -nr_taken); - __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); + __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken); reclaim_stat->recent_scanned[file] += nr_taken; if (global_reclaim(sc)) - __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned); - __count_zone_vm_events(PGREFILL, zone, nr_scanned); + __mod_node_page_state(pgdat, NR_PAGES_SCANNED, nr_scanned); + __count_vm_events(PGREFILL, nr_scanned); - spin_unlock_irq(&zone->lru_lock); + spin_unlock_irq(&pgdat->lru_lock); while (!list_empty(&l_hold)) { cond_resched(); @@ -1844,7 +1890,7 @@ static void shrink_active_list(unsigned long nr_to_scan, /* * Move pages back to the lru list. */ - spin_lock_irq(&zone->lru_lock); + spin_lock_irq(&pgdat->lru_lock); /* * Count referenced pages from currently used mappings as rotated, * even though only some of them are actually re-activated. This @@ -1855,8 +1901,8 @@ static void shrink_active_list(unsigned long nr_to_scan, move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru); move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE); - __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); - spin_unlock_irq(&zone->lru_lock); + __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); + spin_unlock_irq(&pgdat->lru_lock); mem_cgroup_uncharge_list(&l_hold); free_hot_cold_page_list(&l_hold, true); @@ -1950,7 +1996,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; u64 fraction[2]; u64 denominator = 0; /* gcc */ - struct zone *zone = lruvec_zone(lruvec); + struct pglist_data *pgdat = lruvec_pgdat(lruvec); unsigned long anon_prio, file_prio; enum scan_balance scan_balance; unsigned long anon, file; @@ -1971,7 +2017,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * well. */ if (current_is_kswapd()) { - if (!zone_reclaimable(zone)) + if (!pgdat_reclaimable(pgdat)) force_scan = true; if (!mem_cgroup_online(memcg)) force_scan = true; @@ -2017,14 +2063,24 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * anon pages. Try to detect this based on file LRU size. */ if (global_reclaim(sc)) { - unsigned long zonefile; - unsigned long zonefree; + unsigned long pgdatfile; + unsigned long pgdatfree; + int z; + unsigned long total_high_wmark = 0; + + pgdatfree = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); + pgdatfile = node_page_state(pgdat, NR_ACTIVE_FILE) + + node_page_state(pgdat, NR_INACTIVE_FILE); - zonefree = zone_page_state(zone, NR_FREE_PAGES); - zonefile = zone_page_state(zone, NR_ACTIVE_FILE) + - zone_page_state(zone, NR_INACTIVE_FILE); + for (z = 0; z < MAX_NR_ZONES; z++) { + struct zone *zone = &pgdat->node_zones[z]; + if (!populated_zone(zone)) + continue; - if (unlikely(zonefile + zonefree <= high_wmark_pages(zone))) { + total_high_wmark += high_wmark_pages(zone); + } + + if (unlikely(pgdatfile + pgdatfree <= total_high_wmark)) { scan_balance = SCAN_ANON; goto out; } @@ -2071,7 +2127,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE) + lruvec_lru_size(lruvec, LRU_INACTIVE_FILE); - spin_lock_irq(&zone->lru_lock); + spin_lock_irq(&pgdat->lru_lock); if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { reclaim_stat->recent_scanned[0] /= 2; reclaim_stat->recent_rotated[0] /= 2; @@ -2092,7 +2148,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, fp = file_prio * (reclaim_stat->recent_scanned[1] + 1); fp /= reclaim_stat->recent_rotated[1] + 1; - spin_unlock_irq(&zone->lru_lock); + spin_unlock_irq(&pgdat->lru_lock); fraction[0] = ap; fraction[1] = fp; @@ -2346,9 +2402,9 @@ static inline bool should_continue_reclaim(struct zone *zone, * inactive lists are large enough, continue reclaiming */ pages_for_compaction = (2UL << sc->order); - inactive_lru_pages = zone_page_state(zone, NR_INACTIVE_FILE); + inactive_lru_pages = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE); if (get_nr_swap_pages() > 0) - inactive_lru_pages += zone_page_state(zone, NR_INACTIVE_ANON); + inactive_lru_pages += node_page_state(zone->zone_pgdat, NR_INACTIVE_ANON); if (sc->nr_reclaimed < pages_for_compaction && inactive_lru_pages > pages_for_compaction) return true; @@ -2363,12 +2419,13 @@ static inline bool should_continue_reclaim(struct zone *zone, } } -static bool shrink_zone(struct zone *zone, struct scan_control *sc, - bool is_classzone) +static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, + enum zone_type classzone_idx) { struct reclaim_state *reclaim_state = current->reclaim_state; unsigned long nr_reclaimed, nr_scanned; bool reclaimable = false; + struct zone *zone = &pgdat->node_zones[classzone_idx]; do { struct mem_cgroup *root = sc->target_mem_cgroup; @@ -2400,7 +2457,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, shrink_zone_memcg(zone, memcg, sc, &lru_pages); zone_lru_pages += lru_pages; - if (memcg && is_classzone) + if (!global_reclaim(sc)) shrink_slab(sc->gfp_mask, zone_to_nid(zone), memcg, sc->nr_scanned - scanned, lru_pages); @@ -2431,7 +2488,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, * Shrink the slab caches in the same proportion that * the eligible LRU pages were scanned. */ - if (global_reclaim(sc) && is_classzone) + if (global_reclaim(sc)) shrink_slab(sc->gfp_mask, zone_to_nid(zone), NULL, sc->nr_scanned - nr_scanned, zone_lru_pages); @@ -2461,7 +2518,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, */ static inline bool compaction_ready(struct zone *zone, int order, int classzone_idx) { - unsigned long balance_gap, watermark; + unsigned long watermark; bool watermark_ok; /* @@ -2470,9 +2527,7 @@ static inline bool compaction_ready(struct zone *zone, int order, int classzone_ * there is a buffer of free pages available to give compaction * a reasonable chance of completing and allocating the page */ - balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP( - zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO)); - watermark = high_wmark_pages(zone) + balance_gap + (2UL << order); + watermark = high_wmark_pages(zone) + (2UL << order); watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, classzone_idx); /* @@ -2515,7 +2570,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) unsigned long nr_soft_reclaimed; unsigned long nr_soft_scanned; gfp_t orig_mask; - enum zone_type requested_highidx = gfp_zone(sc->gfp_mask); + enum zone_type classzone_idx; /* * If the number of buffer_heads in the machine exceeds the maximum @@ -2523,17 +2578,23 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) * highmem pages could be pinning lowmem pages storing buffer_heads */ orig_mask = sc->gfp_mask; - if (buffer_heads_over_limit) + if (buffer_heads_over_limit) { sc->gfp_mask |= __GFP_HIGHMEM; + sc->reclaim_idx = classzone_idx = gfp_zone(sc->gfp_mask); + } for_each_zone_zonelist_nodemask(zone, z, zonelist, - gfp_zone(sc->gfp_mask), sc->nodemask) { - enum zone_type classzone_idx; - + sc->reclaim_idx, sc->nodemask) { if (!populated_zone(zone)) continue; - classzone_idx = requested_highidx; + /* + * Note that reclaim_idx does not change as it is the highest + * zone reclaimed from which for empty zones is a no-op but + * classzone_idx is used by shrink_node to test if the slabs + * should be shrunk on a given node. + */ + classzone_idx = sc->reclaim_idx; while (!populated_zone(zone->zone_pgdat->node_zones + classzone_idx)) classzone_idx--; @@ -2548,7 +2609,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) continue; if (sc->priority != DEF_PRIORITY && - !zone_reclaimable(zone)) + !pgdat_reclaimable(zone->zone_pgdat)) continue; /* Let kswapd poll it */ /* @@ -2562,8 +2623,8 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) */ if (IS_ENABLED(CONFIG_COMPACTION) && sc->order > PAGE_ALLOC_COSTLY_ORDER && - zonelist_zone_idx(z) <= requested_highidx && - compaction_ready(zone, sc->order, requested_highidx)) { + zonelist_zone_idx(z) <= classzone_idx && + compaction_ready(zone, sc->order, classzone_idx)) { sc->compaction_ready = true; continue; } @@ -2583,7 +2644,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) /* need some check for avoid more shrink_zone() */ } - shrink_zone(zone, sc, zone_idx(zone) == classzone_idx); + shrink_node(zone->zone_pgdat, sc, classzone_idx); } /* @@ -2686,7 +2747,7 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) for (i = 0; i <= ZONE_NORMAL; i++) { zone = &pgdat->node_zones[i]; if (!populated_zone(zone) || - zone_reclaimable_pages(zone) == 0) + pgdat_reclaimable_pages(pgdat) == 0) continue; pfmemalloc_reserve += min_wmark_pages(zone); @@ -2809,6 +2870,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, struct scan_control sc = { .nr_to_reclaim = SWAP_CLUSTER_MAX, .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)), + .reclaim_idx = gfp_zone(gfp_mask), .order = order, .nodemask = nodemask, .priority = DEF_PRIORITY, @@ -2848,6 +2910,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, .target_mem_cgroup = memcg, .may_writepage = !laptop_mode, .may_unmap = 1, + .reclaim_idx = MAX_NR_ZONES - 1, .may_swap = !noswap, }; unsigned long lru_pages; @@ -2886,6 +2949,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX), .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), + .reclaim_idx = MAX_NR_ZONES - 1, .target_mem_cgroup = memcg, .priority = DEF_PRIORITY, .may_writepage = !laptop_mode, @@ -2914,7 +2978,8 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, } #endif -static void age_active_anon(struct zone *zone, struct scan_control *sc) +static void age_active_anon(struct pglist_data *pgdat, + struct zone *zone, struct scan_control *sc) { struct mem_cgroup *memcg; @@ -2933,84 +2998,13 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc) } while (memcg); } -static bool zone_balanced(struct zone *zone, int order, bool highorder, - unsigned long balance_gap, int classzone_idx) +static bool zone_balanced(struct zone *zone, int order, int classzone_idx) { - unsigned long mark = high_wmark_pages(zone) + balance_gap; - - /* - * When checking from pgdat_balanced(), kswapd should stop and sleep - * when it reaches the high order-0 watermark and let kcompactd take - * over. Other callers such as wakeup_kswapd() want to determine the - * true high-order watermark. - */ - if (IS_ENABLED(CONFIG_COMPACTION) && !highorder) { - mark += (1UL << order); - order = 0; - } + unsigned long mark = high_wmark_pages(zone); return zone_watermark_ok_safe(zone, order, mark, classzone_idx); } -/* - * pgdat_balanced() is used when checking if a node is balanced. - * - * For order-0, all zones must be balanced! - * - * For high-order allocations only zones that meet watermarks and are in a - * zone allowed by the callers classzone_idx are added to balanced_pages. The - * total of balanced pages must be at least 25% of the zones allowed by - * classzone_idx for the node to be considered balanced. Forcing all zones to - * be balanced for high orders can cause excessive reclaim when there are - * imbalanced zones. - * The choice of 25% is due to - * o a 16M DMA zone that is balanced will not balance a zone on any - * reasonable sized machine - * o On all other machines, the top zone must be at least a reasonable - * percentage of the middle zones. For example, on 32-bit x86, highmem - * would need to be at least 256M for it to be balance a whole node. - * Similarly, on x86-64 the Normal zone would need to be at least 1G - * to balance a node on its own. These seemed like reasonable ratios. - */ -static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx) -{ - unsigned long managed_pages = 0; - unsigned long balanced_pages = 0; - int i; - - /* Check the watermark levels */ - for (i = 0; i <= classzone_idx; i++) { - struct zone *zone = pgdat->node_zones + i; - - if (!populated_zone(zone)) - continue; - - managed_pages += zone->managed_pages; - - /* - * A special case here: - * - * balance_pgdat() skips over all_unreclaimable after - * DEF_PRIORITY. Effectively, it considers them balanced so - * they must be considered balanced here as well! - */ - if (!zone_reclaimable(zone)) { - balanced_pages += zone->managed_pages; - continue; - } - - if (zone_balanced(zone, order, false, 0, i)) - balanced_pages += zone->managed_pages; - else if (!order) - return false; - } - - if (order) - return balanced_pages >= (managed_pages >> 2); - else - return true; -} - /* * Prepare kswapd for sleeping. This verifies that there are no processes * waiting in throttle_direct_reclaim() and that watermarks have been met. @@ -3020,6 +3014,8 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx) static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, int classzone_idx) { + int i; + /* If a direct reclaimer woke kswapd within HZ/10, it's premature */ if (remaining) return false; @@ -3040,91 +3036,82 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, if (waitqueue_active(&pgdat->pfmemalloc_wait)) wake_up_all(&pgdat->pfmemalloc_wait); - return pgdat_balanced(pgdat, order, classzone_idx); + for (i = 0; i <= classzone_idx; i++) { + struct zone *zone = pgdat->node_zones + i; + + if (!populated_zone(zone)) + continue; + + if (zone_balanced(zone, order, classzone_idx)) + return true; + } + + return false; } /* - * kswapd shrinks the zone by the number of pages required to reach - * the high watermark. + * kswapd shrinks a node of pages that are at or below the highest usable + * zone that is currently unbalanced. * * Returns true if kswapd scanned at least the requested number of pages to * reclaim or if the lack of progress was due to pages under writeback. * This is used to determine if the scanning priority needs to be raised. */ -static bool kswapd_shrink_zone(struct zone *zone, +static bool kswapd_shrink_node(pg_data_t *pgdat, int classzone_idx, struct scan_control *sc) { - unsigned long balance_gap; - bool lowmem_pressure; + struct zone *zone; + int z; - /* Reclaim above the high watermark. */ - sc->nr_to_reclaim = max(SWAP_CLUSTER_MAX, high_wmark_pages(zone)); + /* Reclaim a number of pages proportional to the number of zones */ + sc->nr_to_reclaim = 0; + for (z = 0; z <= classzone_idx; z++) { + zone = pgdat->node_zones + z; + if (!populated_zone(zone)) + continue; - /* - * We put equal pressure on every zone, unless one zone has way too - * many pages free already. The "too many pages" is defined as the - * high wmark plus a "gap" where the gap is either the low - * watermark or 1% of the zone, whichever is smaller. - */ - balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP( - zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO)); + sc->nr_to_reclaim += max(high_wmark_pages(zone), SWAP_CLUSTER_MAX); + } /* - * If there is no low memory pressure or the zone is balanced then no - * reclaim is necessary + * Historically care was taken to put equal pressure on all zones but + * now pressure is applied based on node LRU order. */ - lowmem_pressure = (buffer_heads_over_limit && is_highmem(zone)); - if (!lowmem_pressure && zone_balanced(zone, sc->order, false, - balance_gap, classzone_idx)) - return true; - - shrink_zone(zone, sc, zone_idx(zone) == classzone_idx); - - clear_bit(ZONE_WRITEBACK, &zone->flags); + shrink_node(pgdat, sc, classzone_idx); /* - * If a zone reaches its high watermark, consider it to be no longer - * congested. It's possible there are dirty pages backed by congested - * BDIs but as pressure is relieved, speculatively avoid congestion - * waits. + * Fragmentation may mean that the system cannot be rebalanced for + * high-order allocations. If twice the allocation size has been + * reclaimed then recheck watermarks only at order-0 to prevent + * excessive reclaim. Assume that a process requested a high-order + * can direct reclaim/compact. */ - if (zone_reclaimable(zone) && - zone_balanced(zone, sc->order, false, 0, classzone_idx)) { - clear_bit(ZONE_CONGESTED, &zone->flags); - clear_bit(ZONE_DIRTY, &zone->flags); - } + if (sc->order && sc->nr_reclaimed >= 2UL << sc->order) + sc->order = 0; return sc->nr_scanned >= sc->nr_to_reclaim; } /* - * For kswapd, balance_pgdat() will work across all this node's zones until - * they are all at high_wmark_pages(zone). + * For kswapd, balance_pgdat() will reclaim pages across a node from zones + * that are eligible for use by the caller until at least one zone is + * balanced. * - * Returns the highest zone idx kswapd was reclaiming at - * - * There is special handling here for zones which are full of pinned pages. - * This can happen if the pages are all mlocked, or if they are all used by - * device drivers (say, ZONE_DMA). Or if they are all in use by hugetlb. - * What we do is to detect the case where all pages in the zone have been - * scanned twice and there has been zero successful reclaim. Mark the zone as - * dead and from now on, only perform a short scan. Basically we're polling - * the zone for when the problem goes away. + * Returns the order kswapd finished reclaiming at. * * kswapd scans the zones in the highmem->normal->dma direction. It skips * zones which have free_pages > high_wmark_pages(zone), but once a zone is - * found to have free_pages <= high_wmark_pages(zone), we scan that zone and the - * lower zones regardless of the number of free pages in the lower zones. This - * interoperates with the page allocator fallback scheme to ensure that aging - * of pages is balanced across the zones. + * found to have free_pages <= high_wmark_pages(zone), any page is that zone + * or lower is eligible for reclaim until at least one usable zone is + * balanced. */ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) { int i; - int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ unsigned long nr_soft_reclaimed; unsigned long nr_soft_scanned; + struct zone *zone; struct scan_control sc = { .gfp_mask = GFP_KERNEL, .order = order, @@ -3132,6 +3119,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) .may_writepage = !laptop_mode, .may_unmap = 1, .may_swap = 1, + .reclaim_idx = classzone_idx, }; count_vm_event(PAGEOUTRUN); @@ -3140,26 +3128,12 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) sc.nr_reclaimed = 0; - /* - * Scan in the highmem->dma direction for the highest - * zone which needs scanning - */ - for (i = pgdat->nr_zones - 1; i >= 0; i--) { - struct zone *zone = pgdat->node_zones + i; - + /* Scan from the highest requested zone to dma */ + for (i = classzone_idx; i >= 0; i--) { + zone = pgdat->node_zones + i; if (!populated_zone(zone)) continue; - if (sc.priority != DEF_PRIORITY && - !zone_reclaimable(zone)) - continue; - - /* - * Do some background aging of the anon list, to give - * pages a chance to be referenced before reclaiming. - */ - age_active_anon(zone, &sc); - /* * If the number of buffer_heads in the machine * exceeds the maximum allowed level and this node @@ -3167,72 +3141,55 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) * it to relieve lowmem pressure. */ if (buffer_heads_over_limit && is_highmem_idx(i)) { - end_zone = i; + classzone_idx = i; break; } - if (!zone_balanced(zone, order, false, 0, 0)) { - end_zone = i; + if (!zone_balanced(zone, order, 0)) { + classzone_idx = i; break; } else { /* - * If balanced, clear the dirty and congested - * flags + * If any eligible zone is balanced then the + * node is not considered congested or dirty. */ - clear_bit(ZONE_CONGESTED, &zone->flags); - clear_bit(ZONE_DIRTY, &zone->flags); + clear_bit(PGDAT_CONGESTED, &zone->zone_pgdat->flags); + clear_bit(PGDAT_DIRTY, &zone->zone_pgdat->flags); } } if (i < 0) goto out; + /* + * Do some background aging of the anon list, to give + * pages a chance to be referenced before reclaiming. All + * pages are rotated regardless of classzone as this is + * about consistent aging. + */ + age_active_anon(pgdat, &pgdat->node_zones[MAX_NR_ZONES - 1], &sc); + /* * If we're getting trouble reclaiming, start doing writepage * even in laptop mode. */ - if (sc.priority < DEF_PRIORITY - 2) + if (sc.priority < DEF_PRIORITY - 2 || !pgdat_reclaimable(pgdat)) sc.may_writepage = 1; + /* Call soft limit reclaim before calling shrink_node. */ + sc.nr_scanned = 0; + nr_soft_scanned = 0; + nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, sc.order, + sc.gfp_mask, &nr_soft_scanned); + sc.nr_reclaimed += nr_soft_reclaimed; + /* - * Now scan the zone in the dma->highmem direction, stopping - * at the last zone which needs scanning. - * - * We do this because the page allocator works in the opposite - * direction. This prevents the page allocator from allocating - * pages behind kswapd's direction of progress, which would - * cause too much scanning of the lower zones. + * There should be no need to raise the scanning priority if + * enough pages are already being scanned that that high + * watermark would be met at 100% efficiency. */ - for (i = 0; i <= end_zone; i++) { - struct zone *zone = pgdat->node_zones + i; - - if (!populated_zone(zone)) - continue; - - if (sc.priority != DEF_PRIORITY && - !zone_reclaimable(zone)) - continue; - - sc.nr_scanned = 0; - - nr_soft_scanned = 0; - /* - * Call soft limit reclaim before calling shrink_zone. - */ - nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, - order, sc.gfp_mask, - &nr_soft_scanned); - sc.nr_reclaimed += nr_soft_reclaimed; - - /* - * There should be no need to raise the scanning - * priority if enough pages are already being scanned - * that that high watermark would be met at 100% - * efficiency. - */ - if (kswapd_shrink_zone(zone, end_zone, &sc)) - raise_priority = false; - } + if (kswapd_shrink_node(pgdat, classzone_idx, &sc)) + raise_priority = false; /* * If the low watermark is met there is no need for processes @@ -3247,21 +3204,38 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) if (try_to_freeze() || kthread_should_stop()) break; + /* + * Stop reclaiming if any eligible zone is balanced and clear + * node writeback or congested. + */ + for (i = 0; i <= classzone_idx; i++) { + zone = pgdat->node_zones + i; + if (!populated_zone(zone)) + continue; + + if (zone_balanced(zone, sc.order, classzone_idx)) { + clear_bit(PGDAT_CONGESTED, &pgdat->flags); + clear_bit(PGDAT_DIRTY, &pgdat->flags); + goto out; + } + } + /* * Raise priority if scanning rate is too low or there was no * progress in reclaiming pages */ if (raise_priority || !sc.nr_reclaimed) sc.priority--; - } while (sc.priority >= 1 && - !pgdat_balanced(pgdat, order, classzone_idx)); + } while (sc.priority >= 1); out: /* - * Return the highest zone idx we were reclaiming at so - * prepare_kswapd_sleep() makes the same decisions as here. + * Return the order kswapd stopped reclaiming at as + * prepare_kswapd_sleep() takes it into account. If another caller + * entered the allocator slow path while kswapd was awake, order will + * remain at the higher level. */ - return end_zone; + return sc.order; } static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, @@ -3418,8 +3392,9 @@ static int kswapd(void *p) */ if (!ret) { trace_mm_vmscan_kswapd_wake(pgdat->node_id, order); - balanced_classzone_idx = balance_pgdat(pgdat, order, - classzone_idx); + + /* return value ignored until next patch */ + balance_pgdat(pgdat, order, classzone_idx); } } @@ -3449,7 +3424,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) } if (!waitqueue_active(&pgdat->kswapd_wait)) return; - if (zone_balanced(zone, order, true, 0, 0)) + if (zone_balanced(zone, order, 0)) return; trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order); @@ -3471,6 +3446,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) struct scan_control sc = { .nr_to_reclaim = nr_to_reclaim, .gfp_mask = GFP_HIGHUSER_MOVABLE, + .reclaim_idx = MAX_NR_ZONES - 1, .priority = DEF_PRIORITY, .may_writepage = 1, .may_unmap = 1, @@ -3606,8 +3582,8 @@ int sysctl_min_slab_ratio = 5; static inline unsigned long zone_unmapped_file_pages(struct zone *zone) { unsigned long file_mapped = zone_page_state(zone, NR_FILE_MAPPED); - unsigned long file_lru = zone_page_state(zone, NR_INACTIVE_FILE) + - zone_page_state(zone, NR_ACTIVE_FILE); + unsigned long file_lru = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE) + + node_page_state(zone->zone_pgdat, NR_ACTIVE_FILE); /* * It's possible for there to be more file mapped pages than @@ -3662,6 +3638,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), .may_unmap = !!(zone_reclaim_mode & RECLAIM_UNMAP), .may_swap = 1, + .reclaim_idx = zone_idx(zone), }; cond_resched(); @@ -3681,7 +3658,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) * priorities until we have enough memory freed. */ do { - shrink_zone(zone, &sc, true); + shrink_node(zone->zone_pgdat, &sc, zone_idx(zone)); } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0); } @@ -3710,7 +3687,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages) return ZONE_RECLAIM_FULL; - if (!zone_reclaimable(zone)) + if (!pgdat_reclaimable(zone->zone_pgdat)) return ZONE_RECLAIM_FULL; /* @@ -3785,11 +3762,11 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) pagezone = page_zone(page); if (pagezone != zone) { if (zone) - spin_unlock_irq(&zone->lru_lock); + spin_unlock_irq(zone_lru_lock(zone)); zone = pagezone; - spin_lock_irq(&zone->lru_lock); + spin_lock_irq(zone_lru_lock(zone)); } - lruvec = mem_cgroup_page_lruvec(page, zone); + lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat); if (!PageLRU(page) || !PageUnevictable(page)) continue; @@ -3808,7 +3785,7 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) if (zone) { __count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued); __count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned); - spin_unlock_irq(&zone->lru_lock); + spin_unlock_irq(zone_lru_lock(zone)); } } #endif /* CONFIG_SHMEM */