X-Git-Url: https://git.kernel.dk/?a=blobdiff_plain;f=mm%2Fvmscan.c;h=374d95d0417856b096d902d40ff7cc29d4021b2e;hb=a71e36045e1fd5813addad2fce878d96e2827d66;hp=b3f5b359280d9a200e23fd26a1c9a2c0d396e3f1;hpb=7ee36a14f06cc937f6b2c2932c2e48f590970581;p=linux-2.6-block.git diff --git a/mm/vmscan.c b/mm/vmscan.c index b3f5b359280d..374d95d04178 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -194,6 +194,24 @@ static bool sane_reclaim(struct scan_control *sc) } #endif +/* + * This misses isolated pages which are not accounted for to save counters. + * As the data only determines if reclaim or compaction continues, it is + * not expected that isolated pages will be a dominating factor. + */ +unsigned long zone_reclaimable_pages(struct zone *zone) +{ + unsigned long nr; + + nr = zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_FILE) + + zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_FILE); + if (get_nr_swap_pages() > 0) + nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) + + zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON); + + return nr; +} + unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat) { unsigned long nr; @@ -1359,23 +1377,14 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec, enum lru_list lru, unsigned long *nr_zone_taken, unsigned long nr_taken) { -#ifdef CONFIG_HIGHMEM int zid; - /* - * Highmem has separate accounting for highmem pages so each zone - * is updated separately. - */ for (zid = 0; zid < MAX_NR_ZONES; zid++) { if (!nr_zone_taken[zid]) continue; __update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]); } -#else - /* Zone ID does not matter on !HIGHMEM */ - __update_lru_size(lruvec, lru, 0, -nr_taken); -#endif #ifdef CONFIG_MEMCG mem_cgroup_update_lru_size(lruvec, lru, -nr_taken); @@ -1415,7 +1424,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, LIST_HEAD(pages_skipped); for (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan && - !list_empty(src); scan++) { + !list_empty(src);) { struct page *page; page = lru_to_page(src); @@ -1429,6 +1438,12 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, continue; } + /* + * Account for scanned and skipped separetly to avoid the pgdat + * being prematurely marked unreclaimable by pgdat_reclaimable. + */ + scan++; + switch (__isolate_lru_page(page, mode)) { case 0: nr_pages = hpage_nr_pages(page); @@ -1456,14 +1471,24 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, */ if (!list_empty(&pages_skipped)) { int zid; + unsigned long total_skipped = 0; - list_splice(&pages_skipped, src); for (zid = 0; zid < MAX_NR_ZONES; zid++) { if (!nr_skipped[zid]) continue; __count_zid_vm_events(PGSCAN_SKIP, zid, nr_skipped[zid]); + total_skipped += nr_skipped[zid]; } + + /* + * Account skipped pages as a partial scan as the pgdat may be + * close to unreclaimable. If the LRU list is empty, account + * skipped pages as a full scan. + */ + scan += list_empty(src) ? total_skipped : total_skipped >> 2; + + list_splice(&pages_skipped, src); } *nr_scanned = scan; trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, scan, @@ -1627,6 +1652,30 @@ static int current_may_throttle(void) bdi_write_congested(current->backing_dev_info); } +static bool inactive_reclaimable_pages(struct lruvec *lruvec, + struct scan_control *sc, enum lru_list lru) +{ + int zid; + struct zone *zone; + int file = is_file_lru(lru); + struct pglist_data *pgdat = lruvec_pgdat(lruvec); + + if (!global_reclaim(sc)) + return true; + + for (zid = sc->reclaim_idx; zid >= 0; zid--) { + zone = &pgdat->node_zones[zid]; + if (!populated_zone(zone)) + continue; + + if (zone_page_state_snapshot(zone, NR_ZONE_LRU_BASE + + LRU_FILE * file) >= SWAP_CLUSTER_MAX) + return true; + } + + return false; +} + /* * shrink_inactive_list() is a helper for shrink_node(). It returns the number * of reclaimed pages @@ -1649,6 +1698,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, struct pglist_data *pgdat = lruvec_pgdat(lruvec); struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; + if (!inactive_reclaimable_pages(lruvec, sc, lru)) + return 0; + while (unlikely(too_many_isolated(pgdat, file, sc))) { congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -1955,12 +2007,15 @@ static void shrink_active_list(unsigned long nr_to_scan, * 1TB 101 10GB * 10TB 320 32GB */ -static bool inactive_list_is_low(struct lruvec *lruvec, bool file) +static bool inactive_list_is_low(struct lruvec *lruvec, bool file, + struct scan_control *sc) { unsigned long inactive_ratio; unsigned long inactive; unsigned long active; unsigned long gb; + struct pglist_data *pgdat = lruvec_pgdat(lruvec); + int zid; /* * If we don't have swap space, anonymous page deactivation @@ -1972,6 +2027,27 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file) inactive = lruvec_lru_size(lruvec, file * LRU_FILE); active = lruvec_lru_size(lruvec, file * LRU_FILE + LRU_ACTIVE); + /* + * For zone-constrained allocations, it is necessary to check if + * deactivations are required for lowmem to be reclaimed. This + * calculates the inactive/active pages available in eligible zones. + */ + for (zid = sc->reclaim_idx + 1; zid < MAX_NR_ZONES; zid++) { + struct zone *zone = &pgdat->node_zones[zid]; + unsigned long inactive_zone, active_zone; + + if (!populated_zone(zone)) + continue; + + inactive_zone = zone_page_state(zone, + NR_ZONE_LRU_BASE + (file * LRU_FILE)); + active_zone = zone_page_state(zone, + NR_ZONE_LRU_BASE + (file * LRU_FILE) + LRU_ACTIVE); + + inactive -= min(inactive, inactive_zone); + active -= min(active, active_zone); + } + gb = (inactive + active) >> (30 - PAGE_SHIFT); if (gb) inactive_ratio = int_sqrt(10 * gb); @@ -1985,7 +2061,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, struct lruvec *lruvec, struct scan_control *sc) { if (is_active_lru(lru)) { - if (inactive_list_is_low(lruvec, is_file_lru(lru))) + if (inactive_list_is_low(lruvec, is_file_lru(lru), sc)) shrink_active_list(nr_to_scan, lruvec, sc, lru); return 0; } @@ -2116,7 +2192,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * lruvec even if it has plenty of old anonymous pages unless the * system is under heavy pressure. */ - if (!inactive_list_is_low(lruvec, true) && + if (!inactive_list_is_low(lruvec, true, sc) && lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) { scan_balance = SCAN_FILE; goto out; @@ -2358,7 +2434,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (inactive_list_is_low(lruvec, false)) + if (inactive_list_is_low(lruvec, false, sc)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); @@ -2485,7 +2561,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) shrink_node_memcg(pgdat, memcg, sc, &lru_pages); node_lru_pages += lru_pages; - if (!global_reclaim(sc)) + if (memcg) shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->nr_scanned - scanned, lru_pages); @@ -2605,9 +2681,6 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx, sc->nodemask) { - if (!populated_zone(zone)) - continue; - /* * Take care memory controller reclaiming has small influence * to global LRU. @@ -3014,7 +3087,7 @@ static void age_active_anon(struct pglist_data *pgdat, do { struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg); - if (inactive_list_is_low(lruvec, false)) + if (inactive_list_is_low(lruvec, false, sc)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); @@ -3777,24 +3850,23 @@ int page_evictable(struct page *page) void check_move_unevictable_pages(struct page **pages, int nr_pages) { struct lruvec *lruvec; - struct zone *zone = NULL; + struct pglist_data *pgdat = NULL; int pgscanned = 0; int pgrescued = 0; int i; for (i = 0; i < nr_pages; i++) { struct page *page = pages[i]; - struct zone *pagezone; + struct pglist_data *pagepgdat = page_pgdat(page); pgscanned++; - pagezone = page_zone(page); - if (pagezone != zone) { - if (zone) - spin_unlock_irq(zone_lru_lock(zone)); - zone = pagezone; - spin_lock_irq(zone_lru_lock(zone)); + if (pagepgdat != pgdat) { + if (pgdat) + spin_unlock_irq(&pgdat->lru_lock); + pgdat = pagepgdat; + spin_lock_irq(&pgdat->lru_lock); } - lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat); + lruvec = mem_cgroup_page_lruvec(page, pgdat); if (!PageLRU(page) || !PageUnevictable(page)) continue; @@ -3810,10 +3882,10 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) } } - if (zone) { + if (pgdat) { __count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued); __count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned); - spin_unlock_irq(zone_lru_lock(zone)); + spin_unlock_irq(&pgdat->lru_lock); } } #endif /* CONFIG_SHMEM */