Merge git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile
[linux-2.6-block.git] / mm / hugetlb.c
index 499cb72c74b1d4c70dfb05610c0fe1248a15f166..271e4432734c376baf0bf4b8953a38e391ac011c 100644 (file)
@@ -61,6 +61,9 @@ DEFINE_SPINLOCK(hugetlb_lock);
 static int num_fault_mutexes;
 static struct mutex *htlb_fault_mutex_table ____cacheline_aligned_in_smp;
 
+/* Forward declaration */
+static int hugetlb_acct_memory(struct hstate *h, long delta);
+
 static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
 {
        bool free = (spool->count == 0) && (spool->used_hpages == 0);
@@ -68,12 +71,18 @@ static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
        spin_unlock(&spool->lock);
 
        /* If no pages are used, and no other handles to the subpool
-        * remain, free the subpool the subpool remain */
-       if (free)
+        * remain, give up any reservations mased on minimum size and
+        * free the subpool */
+       if (free) {
+               if (spool->min_hpages != -1)
+                       hugetlb_acct_memory(spool->hstate,
+                                               -spool->min_hpages);
                kfree(spool);
+       }
 }
 
-struct hugepage_subpool *hugepage_new_subpool(long nr_blocks)
+struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages,
+                                               long min_hpages)
 {
        struct hugepage_subpool *spool;
 
@@ -83,7 +92,15 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks)
 
        spin_lock_init(&spool->lock);
        spool->count = 1;
-       spool->max_hpages = nr_blocks;
+       spool->max_hpages = max_hpages;
+       spool->hstate = h;
+       spool->min_hpages = min_hpages;
+
+       if (min_hpages != -1 && hugetlb_acct_memory(h, min_hpages)) {
+               kfree(spool);
+               return NULL;
+       }
+       spool->rsv_hpages = min_hpages;
 
        return spool;
 }
@@ -907,6 +924,31 @@ struct hstate *size_to_hstate(unsigned long size)
        return NULL;
 }
 
+/*
+ * Test to determine whether the hugepage is "active/in-use" (i.e. being linked
+ * to hstate->hugepage_activelist.)
+ *
+ * This function can be called for tail pages, but never returns true for them.
+ */
+bool page_huge_active(struct page *page)
+{
+       VM_BUG_ON_PAGE(!PageHuge(page), page);
+       return PageHead(page) && PagePrivate(&page[1]);
+}
+
+/* never called for tail page */
+static void set_page_huge_active(struct page *page)
+{
+       VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
+       SetPagePrivate(&page[1]);
+}
+
+static void clear_page_huge_active(struct page *page)
+{
+       VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
+       ClearPagePrivate(&page[1]);
+}
+
 void free_huge_page(struct page *page)
 {
        /*
@@ -935,6 +977,7 @@ void free_huge_page(struct page *page)
                restore_reserve = true;
 
        spin_lock(&hugetlb_lock);
+       clear_page_huge_active(page);
        hugetlb_cgroup_uncharge_page(hstate_index(h),
                                     pages_per_huge_page(h), page);
        if (restore_reserve)
@@ -2955,6 +2998,7 @@ retry_avoidcopy:
        copy_user_huge_page(new_page, old_page, address, vma,
                            pages_per_huge_page(h));
        __SetPageUptodate(new_page);
+       set_page_huge_active(new_page);
 
        mmun_start = address & huge_page_mask(h);
        mmun_end = mmun_start + huge_page_size(h);
@@ -3067,6 +3111,7 @@ retry:
                }
                clear_huge_page(page, address, pages_per_huge_page(h));
                __SetPageUptodate(page);
+               set_page_huge_active(page);
 
                if (vma->vm_flags & VM_MAYSHARE) {
                        int err;
@@ -3851,20 +3896,6 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
 
 #ifdef CONFIG_MEMORY_FAILURE
 
-/* Should be called in hugetlb_lock */
-static int is_hugepage_on_freelist(struct page *hpage)
-{
-       struct page *page;
-       struct page *tmp;
-       struct hstate *h = page_hstate(hpage);
-       int nid = page_to_nid(hpage);
-
-       list_for_each_entry_safe(page, tmp, &h->hugepage_freelists[nid], lru)
-               if (page == hpage)
-                       return 1;
-       return 0;
-}
-
 /*
  * This function is called from memory failure code.
  * Assume the caller holds page lock of the head page.
@@ -3876,7 +3907,11 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage)
        int ret = -EBUSY;
 
        spin_lock(&hugetlb_lock);
-       if (is_hugepage_on_freelist(hpage)) {
+       /*
+        * Just checking !page_huge_active is not enough, because that could be
+        * an isolated/hwpoisoned hugepage (which have >0 refcount).
+        */
+       if (!page_huge_active(hpage) && !page_count(hpage)) {
                /*
                 * Hwpoisoned hugepage isn't linked to activelist or freelist,
                 * but dangling hpage->lru can trigger list-debug warnings
@@ -3896,42 +3931,27 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage)
 
 bool isolate_huge_page(struct page *page, struct list_head *list)
 {
+       bool ret = true;
+
        VM_BUG_ON_PAGE(!PageHead(page), page);
-       if (!get_page_unless_zero(page))
-               return false;
        spin_lock(&hugetlb_lock);
+       if (!page_huge_active(page) || !get_page_unless_zero(page)) {
+               ret = false;
+               goto unlock;
+       }
+       clear_page_huge_active(page);
        list_move_tail(&page->lru, list);
+unlock:
        spin_unlock(&hugetlb_lock);
-       return true;
+       return ret;
 }
 
 void putback_active_hugepage(struct page *page)
 {
        VM_BUG_ON_PAGE(!PageHead(page), page);
        spin_lock(&hugetlb_lock);
+       set_page_huge_active(page);
        list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
        spin_unlock(&hugetlb_lock);
        put_page(page);
 }
-
-bool is_hugepage_active(struct page *page)
-{
-       VM_BUG_ON_PAGE(!PageHuge(page), page);
-       /*
-        * This function can be called for a tail page because the caller,
-        * scan_movable_pages, scans through a given pfn-range which typically
-        * covers one memory block. In systems using gigantic hugepage (1GB
-        * for x86_64,) a hugepage is larger than a memory block, and we don't
-        * support migrating such large hugepages for now, so return false
-        * when called for tail pages.
-        */
-       if (PageTail(page))
-               return false;
-       /*
-        * Refcount of a hwpoisoned hugepages is 1, but they are not active,
-        * so we should return false for them.
-        */
-       if (unlikely(PageHWPoison(page)))
-               return false;
-       return page_count(page) > 0;
-}