memcg: avoid accounting special pages

author KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

Sun, 19 Oct 2008 03:28:10 +0000 (20:28 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 20 Oct 2008 15:52:38 +0000 (08:52 -0700)
author KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Sun, 19 Oct 2008 03:28:10 +0000 (20:28 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 20 Oct 2008 15:52:38 +0000 (08:52 -0700)
diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt

index 9b53d5827361fd647f3388212e648f502defc698..1c07547d3f81f28a19edb9de8752f8865b44478d 100644 (file)
--- a/Documentation/controllers/memory.txt
+++ b/Documentation/controllers/memory.txt
@@ -112,14 +112,22 @@ the per cgroup LRU.
  
  2.2.1 Accounting details
  
-All mapped pages (RSS) and unmapped user pages (Page Cache) are accounted.
-RSS pages are accounted at the time of page_add_*_rmap() unless they've already
-been accounted for earlier. A file page will be accounted for as Page Cache;
-it's mapped into the page tables of a process, duplicate accounting is carefully
-avoided. Page Cache pages are accounted at the time of add_to_page_cache().
-The corresponding routines that remove a page from the page tables or removes
-a page from Page Cache is used to decrement the accounting counters of the
-cgroup.
+All mapped anon pages (RSS) and cache pages (Page Cache) are accounted.
+(some pages which never be reclaimable and will not be on global LRU
+ are not accounted. we just accounts pages under usual vm management.)
+
+RSS pages are accounted at page_fault unless they've already been accounted
+for earlier. A file page will be accounted for as Page Cache when it's
+inserted into inode (radix-tree). While it's mapped into the page tables of
+processes, duplicate accounting is carefully avoided.
+
+A RSS page is unaccounted when it's fully unmapped. A PageCache page is
+unaccounted when it's removed from radix-tree.
+
+At page migration, accounting information is kept.
+
+Note: we just account pages-on-lru because our purpose is to control amount
+of used pages. not-on-lru pages are tend to be out-of-control from vm view.
  
  2.3 Shared Page Accounting
  
diff --git a/mm/memory.c b/mm/memory.c

index 54cf20ee0a83c6a3b6af2041f756a6d0aa3ae20a..3a6c4a6583256584303c4ac7c8813938abc49ed0 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1323,18 +1323,14 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
         pte_t *pte;
         spinlock_t *ptl;
  
-       retval = mem_cgroup_charge(page, mm, GFP_KERNEL);
-       if (retval)
-               goto out;
-
         retval = -EINVAL;
         if (PageAnon(page))
-               goto out_uncharge;
+               goto out;
         retval = -ENOMEM;
         flush_dcache_page(page);
         pte = get_locked_pte(mm, addr, &ptl);
         if (!pte)
-               goto out_uncharge;
+               goto out;
         retval = -EBUSY;
         if (!pte_none(*pte))
                 goto out_unlock;
@@ -1350,8 +1346,6 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
         return retval;
  out_unlock:
         pte_unmap_unlock(pte, ptl);
-out_uncharge:
-       mem_cgroup_uncharge_page(page);
  out:
         return retval;
  }
@@ -2463,6 +2457,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         struct page *page;
         pte_t entry;
         int anon = 0;
+       int charged = 0;
         struct page *dirty_page = NULL;
         struct vm_fault vmf;
         int ret;
@@ -2503,6 +2498,12 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                                 ret = VM_FAULT_OOM;
                                 goto out;
                         }
+                       if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
+                               ret = VM_FAULT_OOM;
+                               page_cache_release(page);
+                               goto out;
+                       }
+                       charged = 1;
                         /*
                          * Don't let another task, with possibly unlocked vma,
                          * keep the mlocked page.
@@ -2543,11 +2544,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
  
         }
  
-       if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
-               ret = VM_FAULT_OOM;
-               goto out;
-       }
-
         page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
  
         /*
@@ -2585,7 +2581,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                 /* no need to invalidate: a not-present page won't be cached */
                 update_mmu_cache(vma, address, entry);
         } else {
-               mem_cgroup_uncharge_page(page);
+               if (charged)
+                       mem_cgroup_uncharge_page(page);
                 if (anon)
                         page_cache_release(page);
                 else
diff --git a/mm/rmap.c b/mm/rmap.c

index 7e90bebbeb6cfe630fa1a1aeb5498bf1e6cee11c..8701d5fce7327d5af01aba202b2bab2f8eabfc73 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -727,8 +727,8 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
                         page_clear_dirty(page);
                         set_page_dirty(page);
                 }
-
-               mem_cgroup_uncharge_page(page);
+               if (PageAnon(page))
+                       mem_cgroup_uncharge_page(page);
                 __dec_zone_page_state(page,
                         PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
                 /*
author	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
	Sun, 19 Oct 2008 03:28:10 +0000 (20:28 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 20 Oct 2008 15:52:38 +0000 (08:52 -0700)
Documentation/controllers/memory.txt		patch \| blob \| blame \| history
mm/memory.c		patch \| blob \| blame \| history
mm/rmap.c		patch \| blob \| blame \| history