static DECLARE_RWSEM(shrinker_rwsem);
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
-#define scan_global_lru(sc) (!(sc)->mem_cgroup)
+#define scanning_global_lru(sc) (!(sc)->mem_cgroup)
#else
-#define scan_global_lru(sc) (1)
+#define scanning_global_lru(sc) (1)
#endif
+static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone,
+ struct scan_control *sc)
+{
+ if (!scanning_global_lru(sc))
+ return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone);
+
+ return &zone->reclaim_stat;
+}
+
+static unsigned long zone_nr_pages(struct zone *zone, struct scan_control *sc,
+ enum lru_list lru)
+{
+ if (!scanning_global_lru(sc))
+ return mem_cgroup_zone_nr_pages(sc->mem_cgroup, zone, lru);
+
+ return zone_page_state(zone, NR_LRU_BASE + lru);
+}
+
+
/*
* Add a shrinker callback to be called from the vm
*/
lru = LRU_UNEVICTABLE;
add_page_to_unevictable_list(page);
}
- mem_cgroup_move_lists(page, lru);
/*
* page's status can change while we move it among lru. If an evictable
lru = !!TestClearPageActive(page) + page_is_file_cache(page);
lru_cache_add_lru(page, lru);
- mem_cgroup_move_lists(page, lru);
put_page(page);
}
#endif /* CONFIG_UNEVICTABLE_LRU */
return ret;
ret = -EBUSY;
+
if (likely(get_page_unless_zero(page))) {
/*
* Be careful not to clear PageLRU until after we're
*/
ClearPageLRU(page);
ret = 0;
+ mem_cgroup_del_lru(page);
}
return ret;
struct pagevec pvec;
unsigned long nr_scanned = 0;
unsigned long nr_reclaimed = 0;
+ struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
pagevec_init(&pvec, 1);
__mod_zone_page_state(zone, NR_INACTIVE_ANON,
-count[LRU_INACTIVE_ANON]);
- if (scan_global_lru(sc)) {
+ if (scanning_global_lru(sc))
zone->pages_scanned += nr_scan;
- zone->recent_scanned[0] += count[LRU_INACTIVE_ANON];
- zone->recent_scanned[0] += count[LRU_ACTIVE_ANON];
- zone->recent_scanned[1] += count[LRU_INACTIVE_FILE];
- zone->recent_scanned[1] += count[LRU_ACTIVE_FILE];
- }
+
+ reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON];
+ reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON];
+ reclaim_stat->recent_scanned[1] += count[LRU_INACTIVE_FILE];
+ reclaim_stat->recent_scanned[1] += count[LRU_ACTIVE_FILE];
+
spin_unlock_irq(&zone->lru_lock);
nr_scanned += nr_scan;
if (current_is_kswapd()) {
__count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan);
__count_vm_events(KSWAPD_STEAL, nr_freed);
- } else if (scan_global_lru(sc))
+ } else if (scanning_global_lru(sc))
__count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan);
__count_zone_vm_events(PGSTEAL, zone, nr_freed);
SetPageLRU(page);
lru = page_lru(page);
add_page_to_lru_list(zone, page, lru);
- mem_cgroup_move_lists(page, lru);
- if (PageActive(page) && scan_global_lru(sc)) {
+ if (PageActive(page)) {
int file = !!page_is_file_cache(page);
- zone->recent_rotated[file]++;
+ reclaim_stat->recent_rotated[file]++;
}
if (!pagevec_add(&pvec, page)) {
spin_unlock_irq(&zone->lru_lock);
struct page *page;
struct pagevec pvec;
enum lru_list lru;
+ struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
lru_add_drain();
spin_lock_irq(&zone->lru_lock);
* zone->pages_scanned is used for detect zone's oom
* mem_cgroup remembers nr_scan by itself.
*/
- if (scan_global_lru(sc)) {
+ if (scanning_global_lru(sc)) {
zone->pages_scanned += pgscanned;
- zone->recent_scanned[!!file] += pgmoved;
}
+ reclaim_stat->recent_scanned[!!file] += pgmoved;
if (file)
__mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
* This helps balance scan pressure between file and anonymous
* pages in get_scan_ratio.
*/
- if (scan_global_lru(sc))
- zone->recent_rotated[!!file] += pgmoved;
+ reclaim_stat->recent_rotated[!!file] += pgmoved;
while (!list_empty(&l_inactive)) {
page = lru_to_page(&l_inactive);
ClearPageActive(page);
list_move(&page->lru, &zone->lru[lru].list);
- mem_cgroup_move_lists(page, lru);
+ mem_cgroup_add_lru_list(page, lru);
pgmoved++;
if (!pagevec_add(&pvec, page)) {
__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
pagevec_release(&pvec);
}
+static int inactive_anon_is_low_global(struct zone *zone)
+{
+ unsigned long active, inactive;
+
+ active = zone_page_state(zone, NR_ACTIVE_ANON);
+ inactive = zone_page_state(zone, NR_INACTIVE_ANON);
+
+ if (inactive * zone->inactive_ratio < active)
+ return 1;
+
+ return 0;
+}
+
+/**
+ * inactive_anon_is_low - check if anonymous pages need to be deactivated
+ * @zone: zone to check
+ * @sc: scan control of this context
+ *
+ * Returns true if the zone does not have enough inactive anon pages,
+ * meaning some active anon pages need to be deactivated.
+ */
+static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
+{
+ int low;
+
+ if (scanning_global_lru(sc))
+ low = inactive_anon_is_low_global(zone);
+ else
+ low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup);
+ return low;
+}
+
static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
struct zone *zone, struct scan_control *sc, int priority)
{
return 0;
}
- if (lru == LRU_ACTIVE_ANON &&
- (!scan_global_lru(sc) || inactive_anon_is_low(zone))) {
+ if (lru == LRU_ACTIVE_ANON && inactive_anon_is_low(zone, sc)) {
shrink_active_list(nr_to_scan, zone, sc, priority, file);
return 0;
}
unsigned long anon, file, free;
unsigned long anon_prio, file_prio;
unsigned long ap, fp;
+ struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
/* If we have no swap space, do not bother scanning anon pages. */
if (nr_swap_pages <= 0) {
return;
}
- anon = zone_page_state(zone, NR_ACTIVE_ANON) +
- zone_page_state(zone, NR_INACTIVE_ANON);
- file = zone_page_state(zone, NR_ACTIVE_FILE) +
- zone_page_state(zone, NR_INACTIVE_FILE);
- free = zone_page_state(zone, NR_FREE_PAGES);
-
- /* If we have very few page cache pages, force-scan anon pages. */
- if (unlikely(file + free <= zone->pages_high)) {
- percent[0] = 100;
- percent[1] = 0;
- return;
+ anon = zone_nr_pages(zone, sc, LRU_ACTIVE_ANON) +
+ zone_nr_pages(zone, sc, LRU_INACTIVE_ANON);
+ file = zone_nr_pages(zone, sc, LRU_ACTIVE_FILE) +
+ zone_nr_pages(zone, sc, LRU_INACTIVE_FILE);
+
+ if (scanning_global_lru(sc)) {
+ free = zone_page_state(zone, NR_FREE_PAGES);
+ /* If we have very few page cache pages,
+ force-scan anon pages. */
+ if (unlikely(file + free <= zone->pages_high)) {
+ percent[0] = 100;
+ percent[1] = 0;
+ return;
+ }
}
/*
*
* anon in [0], file in [1]
*/
- if (unlikely(zone->recent_scanned[0] > anon / 4)) {
+ if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
spin_lock_irq(&zone->lru_lock);
- zone->recent_scanned[0] /= 2;
- zone->recent_rotated[0] /= 2;
+ reclaim_stat->recent_scanned[0] /= 2;
+ reclaim_stat->recent_rotated[0] /= 2;
spin_unlock_irq(&zone->lru_lock);
}
- if (unlikely(zone->recent_scanned[1] > file / 4)) {
+ if (unlikely(reclaim_stat->recent_scanned[1] > file / 4)) {
spin_lock_irq(&zone->lru_lock);
- zone->recent_scanned[1] /= 2;
- zone->recent_rotated[1] /= 2;
+ reclaim_stat->recent_scanned[1] /= 2;
+ reclaim_stat->recent_rotated[1] /= 2;
spin_unlock_irq(&zone->lru_lock);
}
* proportional to the fraction of recently scanned pages on
* each list that were recently referenced and in active use.
*/
- ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1);
- ap /= zone->recent_rotated[0] + 1;
+ ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1);
+ ap /= reclaim_stat->recent_rotated[0] + 1;
- fp = (file_prio + 1) * (zone->recent_scanned[1] + 1);
- fp /= zone->recent_rotated[1] + 1;
+ fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
+ fp /= reclaim_stat->recent_rotated[1] + 1;
/* Normalize to percentages */
percent[0] = 100 * ap / (ap + fp + 1);
get_scan_ratio(zone, sc, percent);
for_each_evictable_lru(l) {
- if (scan_global_lru(sc)) {
- int file = is_file_lru(l);
- int scan;
-
- scan = zone_page_state(zone, NR_LRU_BASE + l);
- if (priority) {
- scan >>= priority;
- scan = (scan * percent[file]) / 100;
- }
+ int file = is_file_lru(l);
+ int scan;
+
+ scan = zone_page_state(zone, NR_LRU_BASE + l);
+ if (priority) {
+ scan >>= priority;
+ scan = (scan * percent[file]) / 100;
+ }
+ if (scanning_global_lru(sc)) {
zone->lru[l].nr_scan += scan;
nr[l] = zone->lru[l].nr_scan;
if (nr[l] >= swap_cluster_max)
zone->lru[l].nr_scan = 0;
else
nr[l] = 0;
- } else {
- /*
- * This reclaim occurs not because zone memory shortage
- * but because memory controller hits its limit.
- * Don't modify zone reclaim related data.
- */
- nr[l] = mem_cgroup_calc_reclaim(sc->mem_cgroup, zone,
- priority, l);
- }
+ } else
+ nr[l] = scan;
}
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
* Even if we did not try to evict anon pages at all, we want to
* rebalance the anon lru active/inactive ratio.
*/
- if (!scan_global_lru(sc) || inactive_anon_is_low(zone))
- shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
- else if (!scan_global_lru(sc))
+ if (inactive_anon_is_low(zone, sc))
shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
throttle_vm_writeout(sc->gfp_mask);
* Take care memory controller reclaiming has small influence
* to global LRU.
*/
- if (scan_global_lru(sc)) {
+ if (scanning_global_lru(sc)) {
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
continue;
note_zone_scanning_priority(zone, priority);
delayacct_freepages_start();
- if (scan_global_lru(sc))
+ if (scanning_global_lru(sc))
count_vm_event(ALLOCSTALL);
/*
* mem_cgroup will not do shrink_slab.
*/
- if (scan_global_lru(sc)) {
+ if (scanning_global_lru(sc)) {
for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
* Don't shrink slabs when reclaiming memory from
* over limit cgroups
*/
- if (scan_global_lru(sc)) {
+ if (scanning_global_lru(sc)) {
shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
if (reclaim_state) {
sc->nr_reclaimed += reclaim_state->reclaimed_slab;
congestion_wait(WRITE, HZ/10);
}
/* top priority shrink_zones still had more to do? don't OOM, then */
- if (!sc->all_unreclaimable && scan_global_lru(sc))
+ if (!sc->all_unreclaimable && scanning_global_lru(sc))
ret = sc->nr_reclaimed;
out:
/*
if (priority < 0)
priority = 0;
- if (scan_global_lru(sc)) {
+ if (scanning_global_lru(sc)) {
for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
- gfp_t gfp_mask)
+ gfp_t gfp_mask,
+ bool noswap,
+ unsigned int swappiness)
{
struct scan_control sc = {
.may_writepage = !laptop_mode,
.may_swap = 1,
.swap_cluster_max = SWAP_CLUSTER_MAX,
- .swappiness = vm_swappiness,
+ .swappiness = swappiness,
.order = 0,
.mem_cgroup = mem_cont,
.isolate_pages = mem_cgroup_isolate_pages,
};
struct zonelist *zonelist;
+ if (noswap)
+ sc.may_swap = 0;
+
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
zonelist = NODE_DATA(numa_node_id())->node_zonelists;
* Do some background aging of the anon list, to give
* pages a chance to be referenced before reclaiming.
*/
- if (inactive_anon_is_low(zone))
+ if (inactive_anon_is_low(zone, &sc))
shrink_active_list(SWAP_CLUSTER_MAX, zone,
&sc, priority, 0);
try_to_freeze();
+ /*
+ * Fragmentation may mean that the system cannot be
+ * rebalanced for high-order allocations in all zones.
+ * At this point, if nr_reclaimed < SWAP_CLUSTER_MAX,
+ * it means the zones have been fully scanned and are still
+ * not balanced. For high-order allocations, there is
+ * little point trying all over again as kswapd may
+ * infinite loop.
+ *
+ * Instead, recheck all watermarks at order-0 as they
+ * are the most important. If watermarks are ok, kswapd will go
+ * back to sleep. High-order users can still perform direct
+ * reclaim if they wish.
+ */
+ if (sc.nr_reclaimed < SWAP_CLUSTER_MAX)
+ order = sc.order = 0;
+
goto loop_again;
}
__dec_zone_state(zone, NR_UNEVICTABLE);
list_move(&page->lru, &zone->lru[l].list);
+ mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l);
__inc_zone_state(zone, NR_INACTIVE_ANON + l);
__count_vm_event(UNEVICTABLE_PGRESCUED);
} else {
*/
SetPageUnevictable(page);
list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
+ mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE);
if (page_evictable(page, NULL))
goto retry;
}