Merge branches 'acpi-pm' and 'pm-sleep'
[linux-2.6-block.git] / arch / x86 / mm / pti.c
index 69a983365392961358ddbb719a9ab7fe5f4e2965..ce38f165489b5a13d92091c8671879f30ce44e20 100644 (file)
@@ -38,6 +38,7 @@
 
 #include <asm/cpufeature.h>
 #include <asm/hypervisor.h>
+#include <asm/vsyscall.h>
 #include <asm/cmdline.h>
 #include <asm/pti.h>
 #include <asm/pgtable.h>
 #undef pr_fmt
 #define pr_fmt(fmt)     "Kernel/User page tables isolation: " fmt
 
+/* Backporting helper */
+#ifndef __GFP_NOTRACK
+#define __GFP_NOTRACK  0
+#endif
+
 static void __init pti_print_if_insecure(const char *reason)
 {
-       if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
+       if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
                pr_info("%s\n", reason);
 }
 
 static void __init pti_print_if_secure(const char *reason)
 {
-       if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
+       if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
                pr_info("%s\n", reason);
 }
 
@@ -90,7 +96,7 @@ void __init pti_check_boottime_disable(void)
        }
 
 autosel:
-       if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
+       if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
                return;
 enable:
        setup_force_cpu_cap(X86_FEATURE_PTI);
@@ -137,6 +143,214 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
        return pgd;
 }
 
+/*
+ * Walk the user copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.
+ *
+ * Returns a pointer to a P4D on success, or NULL on failure.
+ */
+static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+{
+       pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
+       gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+
+       if (address < PAGE_OFFSET) {
+               WARN_ONCE(1, "attempt to walk user address\n");
+               return NULL;
+       }
+
+       if (pgd_none(*pgd)) {
+               unsigned long new_p4d_page = __get_free_page(gfp);
+               if (!new_p4d_page)
+                       return NULL;
+
+               set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
+       }
+       BUILD_BUG_ON(pgd_large(*pgd) != 0);
+
+       return p4d_offset(pgd, address);
+}
+
+/*
+ * Walk the user copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.
+ *
+ * Returns a pointer to a PMD on success, or NULL on failure.
+ */
+static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+{
+       gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+       p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
+       pud_t *pud;
+
+       BUILD_BUG_ON(p4d_large(*p4d) != 0);
+       if (p4d_none(*p4d)) {
+               unsigned long new_pud_page = __get_free_page(gfp);
+               if (!new_pud_page)
+                       return NULL;
+
+               set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
+       }
+
+       pud = pud_offset(p4d, address);
+       /* The user page tables do not use large mappings: */
+       if (pud_large(*pud)) {
+               WARN_ON(1);
+               return NULL;
+       }
+       if (pud_none(*pud)) {
+               unsigned long new_pmd_page = __get_free_page(gfp);
+               if (!new_pmd_page)
+                       return NULL;
+
+               set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
+       }
+
+       return pmd_offset(pud, address);
+}
+
+#ifdef CONFIG_X86_VSYSCALL_EMULATION
+/*
+ * Walk the shadow copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.  Does not support large pages.
+ *
+ * Note: this is only used when mapping *new* kernel data into the
+ * user/shadow page tables.  It is never used for userspace data.
+ *
+ * Returns a pointer to a PTE on success, or NULL on failure.
+ */
+static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
+{
+       gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+       pmd_t *pmd = pti_user_pagetable_walk_pmd(address);
+       pte_t *pte;
+
+       /* We can't do anything sensible if we hit a large mapping. */
+       if (pmd_large(*pmd)) {
+               WARN_ON(1);
+               return NULL;
+       }
+
+       if (pmd_none(*pmd)) {
+               unsigned long new_pte_page = __get_free_page(gfp);
+               if (!new_pte_page)
+                       return NULL;
+
+               set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
+       }
+
+       pte = pte_offset_kernel(pmd, address);
+       if (pte_flags(*pte) & _PAGE_USER) {
+               WARN_ONCE(1, "attempt to walk to user pte\n");
+               return NULL;
+       }
+       return pte;
+}
+
+static void __init pti_setup_vsyscall(void)
+{
+       pte_t *pte, *target_pte;
+       unsigned int level;
+
+       pte = lookup_address(VSYSCALL_ADDR, &level);
+       if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
+               return;
+
+       target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
+       if (WARN_ON(!target_pte))
+               return;
+
+       *target_pte = *pte;
+       set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir));
+}
+#else
+static void __init pti_setup_vsyscall(void) { }
+#endif
+
+static void __init
+pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
+{
+       unsigned long addr;
+
+       /*
+        * Clone the populated PMDs which cover start to end. These PMD areas
+        * can have holes.
+        */
+       for (addr = start; addr < end; addr += PMD_SIZE) {
+               pmd_t *pmd, *target_pmd;
+               pgd_t *pgd;
+               p4d_t *p4d;
+               pud_t *pud;
+
+               pgd = pgd_offset_k(addr);
+               if (WARN_ON(pgd_none(*pgd)))
+                       return;
+               p4d = p4d_offset(pgd, addr);
+               if (WARN_ON(p4d_none(*p4d)))
+                       return;
+               pud = pud_offset(p4d, addr);
+               if (pud_none(*pud))
+                       continue;
+               pmd = pmd_offset(pud, addr);
+               if (pmd_none(*pmd))
+                       continue;
+
+               target_pmd = pti_user_pagetable_walk_pmd(addr);
+               if (WARN_ON(!target_pmd))
+                       return;
+
+               /*
+                * Copy the PMD.  That is, the kernelmode and usermode
+                * tables will share the last-level page tables of this
+                * address range
+                */
+               *target_pmd = pmd_clear_flags(*pmd, clear);
+       }
+}
+
+/*
+ * Clone a single p4d (i.e. a top-level entry on 4-level systems and a
+ * next-level entry on 5-level systems.
+ */
+static void __init pti_clone_p4d(unsigned long addr)
+{
+       p4d_t *kernel_p4d, *user_p4d;
+       pgd_t *kernel_pgd;
+
+       user_p4d = pti_user_pagetable_walk_p4d(addr);
+       kernel_pgd = pgd_offset_k(addr);
+       kernel_p4d = p4d_offset(kernel_pgd, addr);
+       *user_p4d = *kernel_p4d;
+}
+
+/*
+ * Clone the CPU_ENTRY_AREA into the user space visible page table.
+ */
+static void __init pti_clone_user_shared(void)
+{
+       pti_clone_p4d(CPU_ENTRY_AREA_BASE);
+}
+
+/*
+ * Clone the ESPFIX P4D into the user space visinble page table
+ */
+static void __init pti_setup_espfix64(void)
+{
+#ifdef CONFIG_X86_ESPFIX64
+       pti_clone_p4d(ESPFIX_BASE_ADDR);
+#endif
+}
+
+/*
+ * Clone the populated PMDs of the entry and irqentry text and force it RO.
+ */
+static void __init pti_clone_entry_text(void)
+{
+       pti_clone_pmds((unsigned long) __entry_text_start,
+                       (unsigned long) __irqentry_text_end,
+                      _PAGE_RW | _PAGE_GLOBAL);
+}
+
 /*
  * Initialize kernel page table isolation
  */
@@ -146,4 +360,9 @@ void __init pti_init(void)
                return;
 
        pr_info("enabled\n");
+
+       pti_clone_user_shared();
+       pti_clone_entry_text();
+       pti_setup_espfix64();
+       pti_setup_vsyscall();
 }