arm64: split thread_info from task stack
authorMark Rutland <mark.rutland@arm.com>
Thu, 3 Nov 2016 20:23:13 +0000 (20:23 +0000)
committerCatalin Marinas <catalin.marinas@arm.com>
Fri, 11 Nov 2016 18:25:46 +0000 (18:25 +0000)
This patch moves arm64's struct thread_info from the task stack into
task_struct. This protects thread_info from corruption in the case of
stack overflows, and makes its address harder to determine if stack
addresses are leaked, making a number of attacks more difficult. Precise
detection and handling of overflow is left for subsequent patches.

Largely, this involves changing code to store the task_struct in sp_el0,
and acquire the thread_info from the task struct. Core code now
implements current_thread_info(), and as noted in <linux/sched.h> this
relies on offsetof(task_struct, thread_info) == 0, enforced by core
code.

This change means that the 'tsk' register used in entry.S now points to
a task_struct, rather than a thread_info as it used to. To make this
clear, the TI_* field offsets are renamed to TSK_TI_*, with asm-offsets
appropriately updated to account for the structural change.

Userspace clobbers sp_el0, and we can no longer restore this from the
stack. Instead, the current task is cached in a per-cpu variable that we
can safely access from early assembly as interrupts are disabled (and we
are thus not preemptible).

Both secondary entry and idle are updated to stash the sp and task
pointer separately.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: James Morse <james.morse@arm.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
arch/arm64/Kconfig
arch/arm64/include/asm/Kbuild
arch/arm64/include/asm/current.h [new file with mode: 0644]
arch/arm64/include/asm/smp.h
arch/arm64/include/asm/thread_info.h
arch/arm64/kernel/asm-offsets.c
arch/arm64/kernel/entry.S
arch/arm64/kernel/head.S
arch/arm64/kernel/process.c
arch/arm64/kernel/smp.c

index 77a807a844aca6a5d7f9dfb63686dd34a982ea50..0b8227f23eed615d6d15a9322c604933fabbf07d 100644 (file)
@@ -109,6 +109,7 @@ config ARM64
        select POWER_SUPPLY
        select SPARSE_IRQ
        select SYSCTL_EXCEPTION_TRACE
+       select THREAD_INFO_IN_TASK
        help
          ARM 64-bit (AArch64) Linux support.
 
index 44e1d7f10add1f892fb839ae41bbaae4cfd76442..28196b18e3947c0d946b33bd69e5677b96c2ecd3 100644 (file)
@@ -1,7 +1,6 @@
 generic-y += bugs.h
 generic-y += clkdev.h
 generic-y += cputime.h
-generic-y += current.h
 generic-y += delay.h
 generic-y += div64.h
 generic-y += dma.h
diff --git a/arch/arm64/include/asm/current.h b/arch/arm64/include/asm/current.h
new file mode 100644 (file)
index 0000000..f2bcbe2
--- /dev/null
@@ -0,0 +1,22 @@
+#ifndef __ASM_CURRENT_H
+#define __ASM_CURRENT_H
+
+#include <linux/compiler.h>
+
+#include <asm/sysreg.h>
+
+#ifndef __ASSEMBLY__
+
+struct task_struct;
+
+static __always_inline struct task_struct *get_current(void)
+{
+       return (struct task_struct *)read_sysreg(sp_el0);
+}
+
+#define current get_current()
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_CURRENT_H */
+
index 968b08de820d437c17ba4353119873cd8bcee726..a62db952ffcbbd266f3e5e1fbbd4681c4718e543 100644 (file)
@@ -82,6 +82,7 @@ asmlinkage void secondary_start_kernel(void);
  */
 struct secondary_data {
        void *stack;
+       struct task_struct *task;
        long status;
 };
 
index bce0f07483c10c85d4428addce812f6e4525f0ad..c17ad4d213d05d57b56495389d2e088c27334906 100644 (file)
@@ -47,41 +47,17 @@ typedef unsigned long mm_segment_t;
 struct thread_info {
        unsigned long           flags;          /* low level flags */
        mm_segment_t            addr_limit;     /* address limit */
-       struct task_struct      *task;          /* main task structure */
        int                     preempt_count;  /* 0 => preemptable, <0 => bug */
-       int                     cpu;            /* cpu */
 };
 
 #define INIT_THREAD_INFO(tsk)                                          \
 {                                                                      \
-       .task           = &tsk,                                         \
-       .flags          = 0,                                            \
        .preempt_count  = INIT_PREEMPT_COUNT,                           \
        .addr_limit     = KERNEL_DS,                                    \
 }
 
 #define init_stack             (init_thread_union.stack)
 
-/*
- * how to get the thread information struct from C
- */
-static inline struct thread_info *current_thread_info(void) __attribute_const__;
-
-/*
- * struct thread_info can be accessed directly via sp_el0.
- *
- * We don't use read_sysreg() as we want the compiler to cache the value where
- * possible.
- */
-static inline struct thread_info *current_thread_info(void)
-{
-       unsigned long sp_el0;
-
-       asm ("mrs %0, sp_el0" : "=r" (sp_el0));
-
-       return (struct thread_info *)sp_el0;
-}
-
 #define thread_saved_pc(tsk)   \
        ((unsigned long)(tsk->thread.cpu_context.pc))
 #define thread_saved_sp(tsk)   \
index d30b2321c0ee07cf6a7f6c0c2ae9a8e63db08c93..c2dc9fa4f09bef502f2582411e96cd99b5f48485 100644 (file)
@@ -36,9 +36,10 @@ int main(void)
 {
   DEFINE(TSK_ACTIVE_MM,                offsetof(struct task_struct, active_mm));
   BLANK();
-  DEFINE(TI_FLAGS,             offsetof(struct thread_info, flags));
-  DEFINE(TI_PREEMPT,           offsetof(struct thread_info, preempt_count));
-  DEFINE(TI_ADDR_LIMIT,                offsetof(struct thread_info, addr_limit));
+  DEFINE(TSK_TI_FLAGS,         offsetof(struct task_struct, thread_info.flags));
+  DEFINE(TSK_TI_PREEMPT,       offsetof(struct task_struct, thread_info.preempt_count));
+  DEFINE(TSK_TI_ADDR_LIMIT,    offsetof(struct task_struct, thread_info.addr_limit));
+  DEFINE(TSK_STACK,            offsetof(struct task_struct, stack));
   BLANK();
   DEFINE(THREAD_CPU_CONTEXT,   offsetof(struct task_struct, thread.cpu_context));
   BLANK();
@@ -121,6 +122,7 @@ int main(void)
   DEFINE(TZ_DSTTIME,           offsetof(struct timezone, tz_dsttime));
   BLANK();
   DEFINE(CPU_BOOT_STACK,       offsetof(struct secondary_data, stack));
+  DEFINE(CPU_BOOT_TASK,                offsetof(struct secondary_data, task));
   BLANK();
 #ifdef CONFIG_KVM_ARM_HOST
   DEFINE(VCPU_CONTEXT,         offsetof(struct kvm_vcpu, arch.ctxt));
index 2d4c83bc1f811b030b8adebe030965c05166c04c..6349a8324b4f6be4c4599c84854e71074a972f0d 100644 (file)
@@ -90,9 +90,8 @@
 
        .if     \el == 0
        mrs     x21, sp_el0
-       mov     tsk, sp
-       and     tsk, tsk, #~(THREAD_SIZE - 1)   // Ensure MDSCR_EL1.SS is clear,
-       ldr     x19, [tsk, #TI_FLAGS]           // since we can unmask debug
+       ldr_this_cpu    tsk, __entry_task, x20  // Ensure MDSCR_EL1.SS is clear,
+       ldr     x19, [tsk, #TSK_TI_FLAGS]       // since we can unmask debug
        disable_step_tsk x19, x20               // exceptions when scheduling.
 
        mov     x29, xzr                        // fp pointed to user-space
        add     x21, sp, #S_FRAME_SIZE
        get_thread_info tsk
        /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */
-       ldr     x20, [tsk, #TI_ADDR_LIMIT]
+       ldr     x20, [tsk, #TSK_TI_ADDR_LIMIT]
        str     x20, [sp, #S_ORIG_ADDR_LIMIT]
        mov     x20, #TASK_SIZE_64
-       str     x20, [tsk, #TI_ADDR_LIMIT]
+       str     x20, [tsk, #TSK_TI_ADDR_LIMIT]
        /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
        .endif /* \el == 0 */
        mrs     x22, elr_el1
        .if     \el != 0
        /* Restore the task's original addr_limit. */
        ldr     x20, [sp, #S_ORIG_ADDR_LIMIT]
-       str     x20, [tsk, #TI_ADDR_LIMIT]
+       str     x20, [tsk, #TSK_TI_ADDR_LIMIT]
 
        /* No need to restore UAO, it will be restored from SPSR_EL1 */
        .endif
@@ -192,13 +191,14 @@ alternative_else_nop_endif
        mov     x19, sp                 // preserve the original sp
 
        /*
-        * Compare sp with the current thread_info, if the top
-        * ~(THREAD_SIZE - 1) bits match, we are on a task stack, and
-        * should switch to the irq stack.
+        * Compare sp with the base of the task stack.
+        * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack,
+        * and should switch to the irq stack.
         */
-       and     x25, x19, #~(THREAD_SIZE - 1)
-       cmp     x25, tsk
-       b.ne    9998f
+       ldr     x25, [tsk, TSK_STACK]
+       eor     x25, x25, x19
+       and     x25, x25, #~(THREAD_SIZE - 1)
+       cbnz    x25, 9998f
 
        adr_this_cpu x25, irq_stack, x26
        mov     x26, #IRQ_STACK_START_SP
@@ -427,9 +427,9 @@ el1_irq:
        irq_handler
 
 #ifdef CONFIG_PREEMPT
-       ldr     w24, [tsk, #TI_PREEMPT]         // get preempt count
+       ldr     w24, [tsk, #TSK_TI_PREEMPT]     // get preempt count
        cbnz    w24, 1f                         // preempt count != 0
-       ldr     x0, [tsk, #TI_FLAGS]            // get flags
+       ldr     x0, [tsk, #TSK_TI_FLAGS]        // get flags
        tbz     x0, #TIF_NEED_RESCHED, 1f       // needs rescheduling?
        bl      el1_preempt
 1:
@@ -444,7 +444,7 @@ ENDPROC(el1_irq)
 el1_preempt:
        mov     x24, lr
 1:     bl      preempt_schedule_irq            // irq en/disable is done inside
-       ldr     x0, [tsk, #TI_FLAGS]            // get new tasks TI_FLAGS
+       ldr     x0, [tsk, #TSK_TI_FLAGS]        // get new tasks TI_FLAGS
        tbnz    x0, #TIF_NEED_RESCHED, 1b       // needs rescheduling?
        ret     x24
 #endif
@@ -674,8 +674,7 @@ ENTRY(cpu_switch_to)
        ldp     x29, x9, [x8], #16
        ldr     lr, [x8]
        mov     sp, x9
-       and     x9, x9, #~(THREAD_SIZE - 1)
-       msr     sp_el0, x9
+       msr     sp_el0, x1
        ret
 ENDPROC(cpu_switch_to)
 
@@ -686,7 +685,7 @@ ENDPROC(cpu_switch_to)
 ret_fast_syscall:
        disable_irq                             // disable interrupts
        str     x0, [sp, #S_X0]                 // returned x0
-       ldr     x1, [tsk, #TI_FLAGS]            // re-check for syscall tracing
+       ldr     x1, [tsk, #TSK_TI_FLAGS]        // re-check for syscall tracing
        and     x2, x1, #_TIF_SYSCALL_WORK
        cbnz    x2, ret_fast_syscall_trace
        and     x2, x1, #_TIF_WORK_MASK
@@ -706,14 +705,14 @@ work_pending:
 #ifdef CONFIG_TRACE_IRQFLAGS
        bl      trace_hardirqs_on               // enabled while in userspace
 #endif
-       ldr     x1, [tsk, #TI_FLAGS]            // re-check for single-step
+       ldr     x1, [tsk, #TSK_TI_FLAGS]        // re-check for single-step
        b       finish_ret_to_user
 /*
  * "slow" syscall return path.
  */
 ret_to_user:
        disable_irq                             // disable interrupts
-       ldr     x1, [tsk, #TI_FLAGS]
+       ldr     x1, [tsk, #TSK_TI_FLAGS]
        and     x2, x1, #_TIF_WORK_MASK
        cbnz    x2, work_pending
 finish_ret_to_user:
@@ -746,7 +745,7 @@ el0_svc_naked:                                      // compat entry point
        enable_dbg_and_irq
        ct_user_exit 1
 
-       ldr     x16, [tsk, #TI_FLAGS]           // check for syscall hooks
+       ldr     x16, [tsk, #TSK_TI_FLAGS]       // check for syscall hooks
        tst     x16, #_TIF_SYSCALL_WORK
        b.ne    __sys_trace
        cmp     scno, sc_nr                     // check upper syscall limit
index 332e33193ccf1575727dfc8883644a5cfabd0e08..eaafb253bbfa58ed2fff02d636dcaed8f9c41f24 100644 (file)
@@ -428,7 +428,8 @@ ENDPROC(__create_page_tables)
 __primary_switched:
        adrp    x4, init_thread_union
        add     sp, x4, #THREAD_SIZE
-       msr     sp_el0, x4                      // Save thread_info
+       adr_l   x5, init_task
+       msr     sp_el0, x5                      // Save thread_info
 
        adr_l   x8, vectors                     // load VBAR_EL1 with virtual
        msr     vbar_el1, x8                    // vector table address
@@ -699,10 +700,10 @@ __secondary_switched:
        isb
 
        adr_l   x0, secondary_data
-       ldr     x0, [x0, #CPU_BOOT_STACK]       // get secondary_data.stack
-       mov     sp, x0
-       and     x0, x0, #~(THREAD_SIZE - 1)
-       msr     sp_el0, x0                      // save thread_info
+       ldr     x1, [x0, #CPU_BOOT_STACK]       // get secondary_data.stack
+       mov     sp, x1
+       ldr     x2, [x0, #CPU_BOOT_TASK]
+       msr     sp_el0, x2
        mov     x29, #0
        b       secondary_start_kernel
 ENDPROC(__secondary_switched)
index ec7b9c00effe1b3fcbb1d8cfbdc91c7f92d261ab..a98b743631c5b3e1b5df3c92e5f092353c1e71d3 100644 (file)
@@ -45,6 +45,7 @@
 #include <linux/personality.h>
 #include <linux/notifier.h>
 #include <trace/events/power.h>
+#include <linux/percpu.h>
 
 #include <asm/alternative.h>
 #include <asm/compat.h>
@@ -321,6 +322,20 @@ void uao_thread_switch(struct task_struct *next)
        }
 }
 
+/*
+ * We store our current task in sp_el0, which is clobbered by userspace. Keep a
+ * shadow copy so that we can restore this upon entry from userspace.
+ *
+ * This is *only* for exception entry from EL0, and is not valid until we
+ * __switch_to() a user task.
+ */
+DEFINE_PER_CPU(struct task_struct *, __entry_task);
+
+static void entry_task_switch(struct task_struct *next)
+{
+       __this_cpu_write(__entry_task, next);
+}
+
 /*
  * Thread switching.
  */
@@ -333,6 +348,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
        tls_thread_switch(next);
        hw_breakpoint_thread_switch(next);
        contextidr_thread_switch(next);
+       entry_task_switch(next);
        uao_thread_switch(next);
 
        /*
index 6f42c68e457fcf4b461ace2c1d7f42bfcab36386..cb87234cfcf2d2b8ba75f9e83dadf78c927a3e86 100644 (file)
@@ -149,6 +149,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
         * We need to tell the secondary core where to find its stack and the
         * page tables.
         */
+       secondary_data.task = idle;
        secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
        update_cpu_boot_status(CPU_MMU_OFF);
        __flush_dcache_area(&secondary_data, sizeof(secondary_data));
@@ -173,6 +174,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
                pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
        }
 
+       secondary_data.task = NULL;
        secondary_data.stack = NULL;
        status = READ_ONCE(secondary_data.status);
        if (ret && status) {