mm: make PR_SET_THP_DISABLE immediately active
authorMichal Hocko <mhocko@suse.com>
Mon, 10 Jul 2017 22:48:02 +0000 (15:48 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 10 Jul 2017 23:32:31 +0000 (16:32 -0700)
PR_SET_THP_DISABLE has a rather subtle semantic.  It doesn't affect any
existing mapping because it only updated mm->def_flags which is a
template for new mappings.

The mappings created after prctl(PR_SET_THP_DISABLE) have VM_NOHUGEPAGE
flag set.  This can be quite surprising for all those applications which
do not do prctl(); fork() & exec() and want to control their own THP
behavior.

Another usecase when the immediate semantic of the prctl might be useful
is a combination of pre- and post-copy migration of containers with
CRIU.  In this case CRIU populates a part of a memory region with data
that was saved during the pre-copy stage.  Afterwards, the region is
registered with userfaultfd and CRIU expects to get page faults for the
parts of the region that were not yet populated.  However, khugepaged
collapses the pages and the expected page faults do not occur.

In more general case, the prctl(PR_SET_THP_DISABLE) could be used as a
temporary mechanism for enabling/disabling THP process wide.

Implementation wise, a new MMF_DISABLE_THP flag is added.  This flag is
tested when decision whether to use huge pages is taken either during
page fault of at the time of THP collapse.

It should be noted, that the new implementation makes PR_SET_THP_DISABLE
master override to any per-VMA setting, which was not the case
previously.

Fixes: a0715cc22601 ("mm, thp: add VM_INIT_DEF_MASK and PRCTL_THP_DISABLE")
Link: http://lkml.kernel.org/r/1496415802-30944-1-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/huge_mm.h
include/linux/khugepaged.h
include/linux/sched/coredump.h
kernel/sys.c
mm/khugepaged.c
mm/shmem.c

index d3b3e8fcc717d517249d73cd587541f6acee1548..40d7b7dd2653f63b5491ef4fae41349105f3966d 100644 (file)
@@ -92,6 +92,7 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
           (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) &&                   \
           ((__vma)->vm_flags & VM_HUGEPAGE))) &&                       \
         !((__vma)->vm_flags & VM_NOHUGEPAGE) &&                        \
+        !test_bit(MMF_DISABLE_THP, &(__vma)->vm_mm->flags) &&          \
         !is_vma_temporary_stack(__vma))
 #define transparent_hugepage_use_zero_page()                           \
        (transparent_hugepage_flags &                                   \
index 5d9a400af5091f297abec57050b5aa2b2ced11aa..f0d7335336cd6ed00bd2759e4c6fb9a5f5b38671 100644 (file)
@@ -48,7 +48,8 @@ static inline int khugepaged_enter(struct vm_area_struct *vma,
        if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
                if ((khugepaged_always() ||
                     (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
-                   !(vm_flags & VM_NOHUGEPAGE))
+                   !(vm_flags & VM_NOHUGEPAGE) &&
+                   !test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
                        if (__khugepaged_enter(vma->vm_mm))
                                return -ENOMEM;
        return 0;
index 69eedcef8f03fbc7553e06002f486a2e06085b45..98ae0d05aa32e4da15edcb532d7d5d86695e8081 100644 (file)
@@ -68,7 +68,10 @@ static inline int get_dumpable(struct mm_struct *mm)
 #define MMF_OOM_SKIP           21      /* mm is of no interest for the OOM killer */
 #define MMF_UNSTABLE           22      /* mm is unstable for copy_from_user */
 #define MMF_HUGE_ZERO_PAGE     23      /* mm has ever used the global huge zero page */
+#define MMF_DISABLE_THP                24      /* disable THP for all VMAs */
+#define MMF_DISABLE_THP_MASK   (1 << MMF_DISABLE_THP)
 
-#define MMF_INIT_MASK          (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
+#define MMF_INIT_MASK          (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
+                                MMF_DISABLE_THP_MASK)
 
 #endif /* _LINUX_SCHED_COREDUMP_H */
index 47d901586b4e0ac5185ba256a720098ad11c96f7..73fc0af147d02699cae7b1317816776e81b38af1 100644 (file)
@@ -2360,7 +2360,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
        case PR_GET_THP_DISABLE:
                if (arg2 || arg3 || arg4 || arg5)
                        return -EINVAL;
-               error = !!(me->mm->def_flags & VM_NOHUGEPAGE);
+               error = !!test_bit(MMF_DISABLE_THP, &me->mm->flags);
                break;
        case PR_SET_THP_DISABLE:
                if (arg3 || arg4 || arg5)
@@ -2368,9 +2368,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
                if (down_write_killable(&me->mm->mmap_sem))
                        return -EINTR;
                if (arg2)
-                       me->mm->def_flags |= VM_NOHUGEPAGE;
+                       set_bit(MMF_DISABLE_THP, &me->mm->flags);
                else
-                       me->mm->def_flags &= ~VM_NOHUGEPAGE;
+                       clear_bit(MMF_DISABLE_THP, &me->mm->flags);
                up_write(&me->mm->mmap_sem);
                break;
        case PR_MPX_ENABLE_MANAGEMENT:
index df4ebdb2b10a373723330dc0124957cd2cb1c021..c01f177a1120a4802fedc63ffc1d9665ea09ec0b 100644 (file)
@@ -816,7 +816,8 @@ khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
 static bool hugepage_vma_check(struct vm_area_struct *vma)
 {
        if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
-           (vma->vm_flags & VM_NOHUGEPAGE))
+           (vma->vm_flags & VM_NOHUGEPAGE) ||
+           test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
                return false;
        if (shmem_file(vma->vm_file)) {
                if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
index 9418f5a9bc46891df40335a74b809c26e5e94928..b0aa6075d164df9ae4766876cc823394abaebc6d 100644 (file)
@@ -1977,10 +1977,12 @@ static int shmem_fault(struct vm_fault *vmf)
        }
 
        sgp = SGP_CACHE;
-       if (vma->vm_flags & VM_HUGEPAGE)
-               sgp = SGP_HUGE;
-       else if (vma->vm_flags & VM_NOHUGEPAGE)
+
+       if ((vma->vm_flags & VM_NOHUGEPAGE) ||
+           test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
                sgp = SGP_NOHUGE;
+       else if (vma->vm_flags & VM_HUGEPAGE)
+               sgp = SGP_HUGE;
 
        error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp,
                                  gfp, vma, vmf, &ret);