x86/vsyscall: Add a new vsyscall=xonly mode
authorAndy Lutomirski <luto@kernel.org>
Thu, 27 Jun 2019 04:45:03 +0000 (21:45 -0700)
committerThomas Gleixner <tglx@linutronix.de>
Thu, 27 Jun 2019 22:04:38 +0000 (00:04 +0200)
With vsyscall emulation on, a readable vsyscall page is still exposed that
contains syscall instructions that validly implement the vsyscalls.

This is required because certain dynamic binary instrumentation tools
attempt to read the call targets of call instructions in the instrumented
code.  If the instrumented code uses vsyscalls, then the vsyscall page needs
to contain readable code.

Unfortunately, leaving readable memory at a deterministic address can be
used to help various ASLR bypasses, so some hardening value can be gained
by disallowing vsyscall reads.

Given how rarely the vsyscall page needs to be readable, add a mechanism to
make the vsyscall page be execute only.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Kernel Hardening <kernel-hardening@lists.openwall.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lkml.kernel.org/r/d17655777c21bc09a7af1bbcf74e6f2b69a51152.1561610354.git.luto@kernel.org
Documentation/admin-guide/kernel-parameters.txt
arch/x86/Kconfig
arch/x86/entry/vsyscall/vsyscall_64.c

index 0082d1e56999ec6d2ef0aec85476799397cedf0e..be8c3a680afa26319f2109191f423f4151769afd 100644 (file)
                        targets for exploits that can control RIP.
 
                        emulate     [default] Vsyscalls turn into traps and are
-                                   emulated reasonably safely.
+                                   emulated reasonably safely.  The vsyscall
+                                   page is readable.
+
+                       xonly       Vsyscalls turn into traps and are
+                                   emulated reasonably safely.  The vsyscall
+                                   page is not readable.
 
                        none        Vsyscalls don't work at all.  This makes
                                    them quite hard to use for exploits but
index 2bbbd4d1ba31de5c0431393247ac4279878eb13d..0182d2c6759063dd9af8a5a2ae261e4da8234c4a 100644 (file)
@@ -2293,23 +2293,38 @@ choice
          it can be used to assist security vulnerability exploitation.
 
          This setting can be changed at boot time via the kernel command
-         line parameter vsyscall=[emulate|none].
+         line parameter vsyscall=[emulate|xonly|none].
 
          On a system with recent enough glibc (2.14 or newer) and no
          static binaries, you can say None without a performance penalty
          to improve security.
 
-         If unsure, select "Emulate".
+         If unsure, select "Emulate execution only".
 
        config LEGACY_VSYSCALL_EMULATE
-               bool "Emulate"
+               bool "Full emulation"
                help
-                 The kernel traps and emulates calls into the fixed
-                 vsyscall address mapping. This makes the mapping
-                 non-executable, but it still contains known contents,
-                 which could be used in certain rare security vulnerability
-                 exploits. This configuration is recommended when userspace
-                 still uses the vsyscall area.
+                 The kernel traps and emulates calls into the fixed vsyscall
+                 address mapping. This makes the mapping non-executable, but
+                 it still contains readable known contents, which could be
+                 used in certain rare security vulnerability exploits. This
+                 configuration is recommended when using legacy userspace
+                 that still uses vsyscalls along with legacy binary
+                 instrumentation tools that require code to be readable.
+
+                 An example of this type of legacy userspace is running
+                 Pin on an old binary that still uses vsyscalls.
+
+       config LEGACY_VSYSCALL_XONLY
+               bool "Emulate execution only"
+               help
+                 The kernel traps and emulates calls into the fixed vsyscall
+                 address mapping and does not allow reads.  This
+                 configuration is recommended when userspace might use the
+                 legacy vsyscall area but support for legacy binary
+                 instrumentation of legacy code is not needed.  It mitigates
+                 certain uses of the vsyscall area as an ASLR-bypassing
+                 buffer.
 
        config LEGACY_VSYSCALL_NONE
                bool "None"
index d9d81ad7a4009aac0bfb37cd0345366a5d206164..fedd7628f3a69b4a36d7e27b4a127e4234fded48 100644 (file)
 #define CREATE_TRACE_POINTS
 #include "vsyscall_trace.h"
 
-static enum { EMULATE, NONE } vsyscall_mode =
+static enum { EMULATE, XONLY, NONE } vsyscall_mode =
 #ifdef CONFIG_LEGACY_VSYSCALL_NONE
        NONE;
+#elif defined(CONFIG_LEGACY_VSYSCALL_XONLY)
+       XONLY;
 #else
        EMULATE;
 #endif
@@ -54,6 +56,8 @@ static int __init vsyscall_setup(char *str)
        if (str) {
                if (!strcmp("emulate", str))
                        vsyscall_mode = EMULATE;
+               else if (!strcmp("xonly", str))
+                       vsyscall_mode = XONLY;
                else if (!strcmp("none", str))
                        vsyscall_mode = NONE;
                else
@@ -357,12 +361,20 @@ void __init map_vsyscall(void)
        extern char __vsyscall_page;
        unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
 
-       if (vsyscall_mode != NONE) {
+       /*
+        * For full emulation, the page needs to exist for real.  In
+        * execute-only mode, there is no PTE at all backing the vsyscall
+        * page.
+        */
+       if (vsyscall_mode == EMULATE) {
                __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
                             PAGE_KERNEL_VVAR);
                set_vsyscall_pgtable_user_bits(swapper_pg_dir);
        }
 
+       if (vsyscall_mode == XONLY)
+               gate_vma.vm_flags = VM_EXEC;
+
        BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
                     (unsigned long)VSYSCALL_ADDR);
 }