x86/vsyscall/64: Explicitly set _PAGE_USER in the pagetable hierarchy
authorAndy Lutomirski <luto@kernel.org>
Mon, 11 Dec 2017 06:47:19 +0000 (22:47 -0800)
committerIngo Molnar <mingo@kernel.org>
Fri, 22 Dec 2017 19:13:01 +0000 (20:13 +0100)
The kernel is very erratic as to which pagetables have _PAGE_USER set.  The
vsyscall page gets lucky: it seems that all of the relevant pagetables are
among the apparently arbitrary ones that set _PAGE_USER.  Rather than
relying on chance, just explicitly set _PAGE_USER.

This will let us clean up pagetable setup to stop setting _PAGE_USER.  The
added code can also be reused by pagetable isolation to manage the
_PAGE_USER bit in the usermode tables.

[ tglx: Folded paravirt fix from Juergen Gross ]

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Laight <David.Laight@aculab.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/entry/vsyscall/vsyscall_64.c

index f279ba2643dc8933b9659242082e7ef2ea2d9dd6..daad57c76e4263d1a244414392de1bf8a4e4016f 100644 (file)
@@ -37,6 +37,7 @@
 #include <asm/unistd.h>
 #include <asm/fixmap.h>
 #include <asm/traps.h>
+#include <asm/paravirt.h>
 
 #define CREATE_TRACE_POINTS
 #include "vsyscall_trace.h"
@@ -329,16 +330,47 @@ int in_gate_area_no_mm(unsigned long addr)
        return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
 }
 
+/*
+ * The VSYSCALL page is the only user-accessible page in the kernel address
+ * range.  Normally, the kernel page tables can have _PAGE_USER clear, but
+ * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
+ * are enabled.
+ *
+ * Some day we may create a "minimal" vsyscall mode in which we emulate
+ * vsyscalls but leave the page not present.  If so, we skip calling
+ * this.
+ */
+static void __init set_vsyscall_pgtable_user_bits(void)
+{
+       pgd_t *pgd;
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+
+       pgd = pgd_offset_k(VSYSCALL_ADDR);
+       set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
+       p4d = p4d_offset(pgd, VSYSCALL_ADDR);
+#if CONFIG_PGTABLE_LEVELS >= 5
+       p4d->p4d |= _PAGE_USER;
+#endif
+       pud = pud_offset(p4d, VSYSCALL_ADDR);
+       set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
+       pmd = pmd_offset(pud, VSYSCALL_ADDR);
+       set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
+}
+
 void __init map_vsyscall(void)
 {
        extern char __vsyscall_page;
        unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
 
-       if (vsyscall_mode != NONE)
+       if (vsyscall_mode != NONE) {
                __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
                             vsyscall_mode == NATIVE
                             ? PAGE_KERNEL_VSYSCALL
                             : PAGE_KERNEL_VVAR);
+               set_vsyscall_pgtable_user_bits();
+       }
 
        BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
                     (unsigned long)VSYSCALL_ADDR);