mm/core, x86/mm/pkeys: Differentiate instruction fetches

[linux-2.6-block.git] / arch / x86 / mm / fault.c
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c

index 3c51c66b65fc804228f32e004ef4a506613bba51..d81744e6f39f24d3dc36c50f550cebf977533c9c 100644 (file)
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -15,12 +15,14 @@
  #include <linux/context_tracking.h>    /* exception_enter(), ...       */
  #include <linux/uaccess.h>             /* faulthandler_disabled()      */
  
+#include <asm/cpufeature.h>            /* boot_cpu_has, ...            */
  #include <asm/traps.h>                 /* dotraplinkage, ...           */
  #include <asm/pgalloc.h>               /* pgd_*(), ...                 */
  #include <asm/kmemcheck.h>             /* kmemcheck_*(), ...           */
  #include <asm/fixmap.h>                        /* VSYSCALL_ADDR                */
  #include <asm/vsyscall.h>              /* emulate_vsyscall             */
  #include <asm/vm86.h>                  /* struct vm86                  */
+#include <asm/mmu_context.h>           /* vma_pkey()                   */
  
  #define CREATE_TRACE_POINTS
  #include <asm/trace/exceptions.h>
@@ -169,6 +171,56 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
         return prefetch;
  }
  
+/*
+ * A protection key fault means that the PKRU value did not allow
+ * access to some PTE.  Userspace can figure out what PKRU was
+ * from the XSAVE state, and this function fills out a field in
+ * siginfo so userspace can discover which protection key was set
+ * on the PTE.
+ *
+ * If we get here, we know that the hardware signaled a PF_PK
+ * fault and that there was a VMA once we got in the fault
+ * handler.  It does *not* guarantee that the VMA we find here
+ * was the one that we faulted on.
+ *
+ * 1. T1   : mprotect_key(foo, PAGE_SIZE, pkey=4);
+ * 2. T1   : set PKRU to deny access to pkey=4, touches page
+ * 3. T1   : faults...
+ * 4.    T2: mprotect_key(foo, PAGE_SIZE, pkey=5);
+ * 5. T1   : enters fault handler, takes mmap_sem, etc...
+ * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
+ *          faulted on a pte with its pkey=4.
+ */
+static void fill_sig_info_pkey(int si_code, siginfo_t *info,
+               struct vm_area_struct *vma)
+{
+       /* This is effectively an #ifdef */
+       if (!boot_cpu_has(X86_FEATURE_OSPKE))
+               return;
+
+       /* Fault not from Protection Keys: nothing to do */
+       if (si_code != SEGV_PKUERR)
+               return;
+       /*
+        * force_sig_info_fault() is called from a number of
+        * contexts, some of which have a VMA and some of which
+        * do not.  The PF_PK handing happens after we have a
+        * valid VMA, so we should never reach this without a
+        * valid VMA.
+        */
+       if (!vma) {
+               WARN_ONCE(1, "PKU fault with no VMA passed in");
+               info->si_pkey = 0;
+               return;
+       }
+       /*
+        * si_pkey should be thought of as a strong hint, but not
+        * absolutely guranteed to be 100% accurate because of
+        * the race explained above.
+        */
+       info->si_pkey = vma_pkey(vma);
+}
+
  static void
  force_sig_info_fault(int si_signo, int si_code, unsigned long address,
                      struct task_struct *tsk, struct vm_area_struct *vma,
@@ -187,6 +239,8 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
                 lsb = PAGE_SHIFT;
         info.si_addr_lsb = lsb;
  
+       fill_sig_info_pkey(si_code, &info, vma);
+
         force_sig_info(si_signo, &info, tsk);
  }
  
@@ -843,11 +897,36 @@ bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
         __bad_area(regs, error_code, address, NULL, SEGV_MAPERR);
  }
  
+static inline bool bad_area_access_from_pkeys(unsigned long error_code,
+               struct vm_area_struct *vma)
+{
+       /* This code is always called on the current mm */
+       bool foreign = false;
+
+       if (!boot_cpu_has(X86_FEATURE_OSPKE))
+               return false;
+       if (error_code & PF_PK)
+               return true;
+       /* this checks permission keys on the VMA: */
+       if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
+                               (error_code & PF_INSTR), foreign))
+               return true;
+       return false;
+}
+
  static noinline void
  bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
                       unsigned long address, struct vm_area_struct *vma)
  {
-       __bad_area(regs, error_code, address, vma, SEGV_ACCERR);
+       /*
+        * This OSPKE check is not strictly necessary at runtime.
+        * But, doing it this way allows compiler optimizations
+        * if pkeys are compiled out.
+        */
+       if (bad_area_access_from_pkeys(error_code, vma))
+               __bad_area(regs, error_code, address, vma, SEGV_PKUERR);
+       else
+               __bad_area(regs, error_code, address, vma, SEGV_ACCERR);
  }
  
  static void
@@ -1019,6 +1098,25 @@ int show_unhandled_signals = 1;
  static inline int
  access_error(unsigned long error_code, struct vm_area_struct *vma)
  {
+       /* This is only called for the current mm, so: */
+       bool foreign = false;
+       /*
+        * Access or read was blocked by protection keys. We do
+        * this check before any others because we do not want
+        * to, for instance, confuse a protection-key-denied
+        * write with one for which we should do a COW.
+        */
+       if (error_code & PF_PK)
+               return 1;
+       /*
+        * Make sure to check the VMA so that we do not perform
+        * faults just to hit a PF_PK as soon as we fill in a
+        * page.
+        */
+       if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
+                               (error_code & PF_INSTR), foreign))
+               return 1;
+
         if (error_code & PF_WRITE) {
                 /* write, present and write, not present: */
                 if (unlikely(!(vma->vm_flags & VM_WRITE)))
@@ -1171,6 +1269,8 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
  
         if (error_code & PF_WRITE)
                 flags |= FAULT_FLAG_WRITE;
+       if (error_code & PF_INSTR)
+               flags |= FAULT_FLAG_INSTRUCTION;
  
         /*
          * When running in the kernel we expect faults to occur only to