mm/hugetlb: add size parameter to huge_pte_offset()
authorPunit Agrawal <punit.agrawal@arm.com>
Thu, 6 Jul 2017 22:39:42 +0000 (15:39 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 6 Jul 2017 23:24:34 +0000 (16:24 -0700)
A poisoned or migrated hugepage is stored as a swap entry in the page
tables.  On architectures that support hugepages consisting of
contiguous page table entries (such as on arm64) this leads to ambiguity
in determining the page table entry to return in huge_pte_offset() when
a poisoned entry is encountered.

Let's remove the ambiguity by adding a size parameter to convey
additional information about the requested address.  Also fixup the
definition/usage of huge_pte_offset() throughout the tree.

Link: http://lkml.kernel.org/r/20170522133604.11392-4-punit.agrawal@arm.com
Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Acked-by: Steve Capper <steve.capper@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: James Hogan <james.hogan@imgtec.com> (odd fixer:METAG ARCHITECTURE)
Cc: Ralf Baechle <ralf@linux-mips.org> (supporter:MIPS)
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
16 files changed:
arch/arm64/mm/hugetlbpage.c
arch/ia64/mm/hugetlbpage.c
arch/metag/mm/hugetlbpage.c
arch/mips/mm/hugetlbpage.c
arch/parisc/mm/hugetlbpage.c
arch/powerpc/mm/hugetlbpage.c
arch/s390/mm/hugetlbpage.c
arch/sh/mm/hugetlbpage.c
arch/sparc/mm/hugetlbpage.c
arch/tile/mm/hugetlbpage.c
arch/x86/mm/hugetlbpage.c
fs/userfaultfd.c
include/linux/hugetlb.h
mm/hugetlb.c
mm/page_vma_mapped.c
mm/pagewalk.c

index f89aa8fa5855f9ec71be6e3299a0e206c074e0a7..656e0ece228946de561456e212843f4a71473fe8 100644 (file)
@@ -131,7 +131,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
        return pte;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm,
+                      unsigned long addr, unsigned long sz)
 {
        pgd_t *pgd;
        pud_t *pud;
index 85de86d36fdf2a71783697212e2e5cdb1e2dbc46..ae35140332f70ba7f749ffc1fa095efacaf8b000 100644 (file)
@@ -44,7 +44,7 @@ huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
 }
 
 pte_t *
-huge_pte_offset (struct mm_struct *mm, unsigned long addr)
+huge_pte_offset (struct mm_struct *mm, unsigned long addr, unsigned long sz)
 {
        unsigned long taddr = htlbpage_to_page(addr);
        pgd_t *pgd;
@@ -92,7 +92,7 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int writ
        if (REGION_NUMBER(addr) != RGN_HPAGE)
                return ERR_PTR(-EINVAL);
 
-       ptep = huge_pte_offset(mm, addr);
+       ptep = huge_pte_offset(mm, addr, HPAGE_SIZE);
        if (!ptep || pte_none(*ptep))
                return NULL;
        page = pte_page(*ptep);
index db1b7da91e4f496b332335c2c69adf73c13fbe70..67fd53e2935a2fc2de72d96606344588998e0d9d 100644 (file)
@@ -74,7 +74,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
        return pte;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm,
+                      unsigned long addr, unsigned long sz)
 {
        pgd_t *pgd;
        pud_t *pud;
index 74aa6f62468f2eb463372c58320cc5e7c69cc2fe..cef152234312faecfbc0a1065214f98570294a08 100644 (file)
@@ -36,7 +36,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr,
        return pte;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
+                      unsigned long sz)
 {
        pgd_t *pgd;
        pud_t *pud;
index aa50ac090e9b9d50648843cd2f85bae1ec7d045d..5eb8f633b282ea88d34e2eeb974d06ad59a02251 100644 (file)
@@ -69,7 +69,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
        return pte;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm,
+                      unsigned long addr, unsigned long sz)
 {
        pgd_t *pgd;
        pud_t *pud;
index 1816b965a1429291de883c0d1ddb52ab88b17ef1..c41dc44472c5cf1babd0e71177ac0d5615825645 100644 (file)
@@ -57,7 +57,7 @@ static unsigned nr_gpages;
 
 #define hugepd_none(hpd)       (hpd_val(hpd) == 0)
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz)
 {
        /* Only called for hugetlbfs pages, hence can ignore THP */
        return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL);
index d3a5e39756f62549ce53b1a20c91df0cbc787be8..44a8e6f0391ec28af96b4ffb3bc50593174d8a69 100644 (file)
@@ -180,7 +180,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
        return (pte_t *) pmdp;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm,
+                      unsigned long addr, unsigned long sz)
 {
        pgd_t *pgdp;
        p4d_t *p4dp;
index cc948db74878045bdcfeeed4c17d821098f0c3fa..d2412d2d64627e5a2db4ade3cc23af0dd95726c0 100644 (file)
@@ -42,7 +42,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
        return pte;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm,
+                      unsigned long addr, unsigned long sz)
 {
        pgd_t *pgd;
        pud_t *pud;
index 88855e383b34c16a8960b1ff89ae207a1ad50b07..28ee8d8ffa0771fa8e22a9be6882695cab873ad4 100644 (file)
@@ -277,7 +277,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
        return pte;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm,
+                      unsigned long addr, unsigned long sz)
 {
        pgd_t *pgd;
        pud_t *pud;
index 03e5cc4e76e4e67548c8ea396012d66d8ab89752..0986d426a413bcb292ffb3452a2d8f66b32f800d 100644 (file)
@@ -102,7 +102,8 @@ static pte_t *get_pte(pte_t *base, int index, int level)
        return ptep;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm,
+                      unsigned long addr, unsigned long sz)
 {
        pgd_t *pgd;
        pud_t *pud;
index adad702b39cd473218d373e7d1b452b96d9816be..2824607df1081fe38a96d5e8af70e7c295e43ed5 100644 (file)
@@ -33,7 +33,7 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
        if (!vma || !is_vm_hugetlb_page(vma))
                return ERR_PTR(-EINVAL);
 
-       pte = huge_pte_offset(mm, address);
+       pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
 
        /* hugetlb should be locked, and hence, prefaulted */
        WARN_ON(!pte || pte_none(*pte));
index 3d0dd082337a6d9b5b3efd3d19b8c193b9772f40..cadcd12a3d35f6ba5deb54df94c995b19d3d34a8 100644 (file)
@@ -214,6 +214,7 @@ static inline struct uffd_msg userfault_msg(unsigned long address,
  * hugepmd ranges.
  */
 static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
+                                        struct vm_area_struct *vma,
                                         unsigned long address,
                                         unsigned long flags,
                                         unsigned long reason)
@@ -224,7 +225,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
 
        VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
 
-       pte = huge_pte_offset(mm, address);
+       pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
        if (!pte)
                goto out;
 
@@ -243,6 +244,7 @@ out:
 }
 #else
 static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
+                                        struct vm_area_struct *vma,
                                         unsigned long address,
                                         unsigned long flags,
                                         unsigned long reason)
@@ -448,7 +450,8 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
                must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
                                                  reason);
        else
-               must_wait = userfaultfd_huge_must_wait(ctx, vmf->address,
+               must_wait = userfaultfd_huge_must_wait(ctx, vmf->vma,
+                                                      vmf->address,
                                                       vmf->flags, reason);
        up_read(&mm->mmap_sem);
 
index c92a1f0c72400e56381eb9f588bb8043019cd69c..31e665fbcf762aac681b519ea60017b6f36602be 100644 (file)
@@ -137,7 +137,8 @@ extern struct list_head huge_boot_pages;
 
 pte_t *huge_pte_alloc(struct mm_struct *mm,
                        unsigned long addr, unsigned long sz);
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr);
+pte_t *huge_pte_offset(struct mm_struct *mm,
+                      unsigned long addr, unsigned long sz);
 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
 struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
                              int write);
@@ -190,7 +191,7 @@ static inline void hugetlb_show_meminfo(void)
 #define hugetlb_fault(mm, vma, addr, flags)    ({ BUG(); 0; })
 #define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
                                src_addr, pagep)        ({ BUG(); 0; })
-#define huge_pte_offset(mm, address)   0
+#define huge_pte_offset(mm, address, sz)       0
 static inline int dequeue_hwpoisoned_huge_page(struct page *page)
 {
        return 0;
index c73828e431004c4fe0e17b1c1a73cf3d0395c7de..0753455323966575f295565fc9ed05d4e74aacc9 100644 (file)
@@ -3246,7 +3246,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 
        for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) {
                spinlock_t *src_ptl, *dst_ptl;
-               src_pte = huge_pte_offset(src, addr);
+               src_pte = huge_pte_offset(src, addr, sz);
                if (!src_pte)
                        continue;
                dst_pte = huge_pte_alloc(dst, addr, sz);
@@ -3330,7 +3330,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
        address = start;
        for (; address < end; address += sz) {
-               ptep = huge_pte_offset(mm, address);
+               ptep = huge_pte_offset(mm, address, sz);
                if (!ptep)
                        continue;
 
@@ -3548,7 +3548,8 @@ retry_avoidcopy:
                        unmap_ref_private(mm, vma, old_page, address);
                        BUG_ON(huge_pte_none(pte));
                        spin_lock(ptl);
-                       ptep = huge_pte_offset(mm, address & huge_page_mask(h));
+                       ptep = huge_pte_offset(mm, address & huge_page_mask(h),
+                                              huge_page_size(h));
                        if (likely(ptep &&
                                   pte_same(huge_ptep_get(ptep), pte)))
                                goto retry_avoidcopy;
@@ -3587,7 +3588,8 @@ retry_avoidcopy:
         * before the page tables are altered
         */
        spin_lock(ptl);
-       ptep = huge_pte_offset(mm, address & huge_page_mask(h));
+       ptep = huge_pte_offset(mm, address & huge_page_mask(h),
+                              huge_page_size(h));
        if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
                ClearPagePrivate(new_page);
 
@@ -3874,7 +3876,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
        address &= huge_page_mask(h);
 
-       ptep = huge_pte_offset(mm, address);
+       ptep = huge_pte_offset(mm, address, huge_page_size(h));
        if (ptep) {
                entry = huge_ptep_get(ptep);
                if (unlikely(is_hugetlb_entry_migration(entry))) {
@@ -4131,7 +4133,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 *
                 * Note that page table lock is not held when pte is null.
                 */
-               pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
+               pte = huge_pte_offset(mm, vaddr & huge_page_mask(h),
+                                     huge_page_size(h));
                if (pte)
                        ptl = huge_pte_lock(h, mm, pte);
                absent = !pte || huge_pte_none(huge_ptep_get(pte));
@@ -4270,7 +4273,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
        i_mmap_lock_write(vma->vm_file->f_mapping);
        for (; address < end; address += huge_page_size(h)) {
                spinlock_t *ptl;
-               ptep = huge_pte_offset(mm, address);
+               ptep = huge_pte_offset(mm, address, huge_page_size(h));
                if (!ptep)
                        continue;
                ptl = huge_pte_lock(h, mm, ptep);
@@ -4534,7 +4537,8 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 
                saddr = page_table_shareable(svma, vma, addr, idx);
                if (saddr) {
-                       spte = huge_pte_offset(svma->vm_mm, saddr);
+                       spte = huge_pte_offset(svma->vm_mm, saddr,
+                                              vma_mmu_pagesize(svma));
                        if (spte) {
                                get_page(virt_to_page(spte));
                                break;
@@ -4630,7 +4634,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
        return pte;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm,
+                      unsigned long addr, unsigned long sz)
 {
        pgd_t *pgd;
        p4d_t *p4d;
index de9c40d7304aa0e714bdd32abe79517ec3d73038..8ec6ba230bb9dabba967489f0ba8f15ca61b8806 100644 (file)
@@ -116,7 +116,8 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 
        if (unlikely(PageHuge(pvmw->page))) {
                /* when pud is not present, pte will be NULL */
-               pvmw->pte = huge_pte_offset(mm, pvmw->address);
+               pvmw->pte = huge_pte_offset(mm, pvmw->address,
+                                           PAGE_SIZE << compound_order(page));
                if (!pvmw->pte)
                        return false;
 
index 60f7856e508fb90e6010feadad2233f4d148341e..1a41979654150f05514cd25f0da685fe397f1104 100644 (file)
@@ -180,12 +180,13 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
        struct hstate *h = hstate_vma(vma);
        unsigned long next;
        unsigned long hmask = huge_page_mask(h);
+       unsigned long sz = huge_page_size(h);
        pte_t *pte;
        int err = 0;
 
        do {
                next = hugetlb_entry_end(h, addr, end);
-               pte = huge_pte_offset(walk->mm, addr & hmask);
+               pte = huge_pte_offset(walk->mm, addr & hmask, sz);
                if (pte && walk->hugetlb_entry)
                        err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
                if (err)