Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 16 May 2016 22:15:17 +0000 (15:15 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 16 May 2016 22:15:17 +0000 (15:15 -0700)
Pull x86 asm updates from Ingo Molnar:
 "The main changes in this cycle were:

   - MSR access API fixes and enhancements (Andy Lutomirski)

   - early exception handling improvements (Andy Lutomirski)

   - user-space FS/GS prctl usage fixes and improvements (Andy
     Lutomirski)

   - Remove the cpu_has_*() APIs and replace them with equivalents
     (Borislav Petkov)

   - task switch micro-optimization (Brian Gerst)

   - 32-bit entry code simplification (Denys Vlasenko)

   - enhance PAT handling in enumated CPUs (Toshi Kani)

  ... and lots of other cleanups/fixlets"

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (70 commits)
  x86/arch_prctl/64: Restore accidentally removed put_cpu() in ARCH_SET_GS
  x86/entry/32: Remove asmlinkage_protect()
  x86/entry/32: Remove GET_THREAD_INFO() from entry code
  x86/entry, sched/x86: Don't save/restore EFLAGS on task switch
  x86/asm/entry/32: Simplify pushes of zeroed pt_regs->REGs
  selftests/x86/ldt_gdt: Test set_thread_area() deletion of an active segment
  x86/tls: Synchronize segment registers in set_thread_area()
  x86/asm/64: Rename thread_struct's fs and gs to fsbase and gsbase
  x86/arch_prctl/64: Remove FSBASE/GSBASE < 4G optimization
  x86/segments/64: When load_gs_index fails, clear the base
  x86/segments/64: When loadsegment(fs, ...) fails, clear the base
  x86/asm: Make asm/alternative.h safe from assembly
  x86/asm: Stop depending on ptrace.h in alternative.h
  x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()
  x86/asm: Make sure verify_cpu() has a good stack
  x86/extable: Add a comment about early exception handlers
  x86/msr: Set the return value to zero when native_rdmsr_safe() fails
  x86/paravirt: Make "unsafe" MSR accesses unsafe even if PARAVIRT=y
  x86/paravirt: Add paravirt_{read,write}_msr()
  x86/msr: Carry on after a non-"safe" MSR access fails
  ...

127 files changed:
Documentation/x86/pat.txt
arch/ia64/include/asm/iommu.h
arch/x86/crypto/aesni-intel_glue.c
arch/x86/crypto/camellia_aesni_avx2_glue.c
arch/x86/crypto/camellia_aesni_avx_glue.c
arch/x86/crypto/chacha20_glue.c
arch/x86/crypto/poly1305_glue.c
arch/x86/crypto/serpent_avx2_glue.c
arch/x86/crypto/serpent_sse2_glue.c
arch/x86/crypto/sha1_ssse3_glue.c
arch/x86/crypto/sha256_ssse3_glue.c
arch/x86/crypto/sha512_ssse3_glue.c
arch/x86/entry/common.c
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/entry/entry_64_compat.S
arch/x86/entry/syscalls/syscall_64.tbl
arch/x86/entry/vdso/vclock_gettime.c
arch/x86/entry/vdso/vdso-layout.lds.S
arch/x86/entry/vdso/vma.c
arch/x86/events/core.c
arch/x86/events/intel/uncore.c
arch/x86/ia32/ia32_signal.c
arch/x86/include/asm/alternative.h
arch/x86/include/asm/apic.h
arch/x86/include/asm/clocksource.h
arch/x86/include/asm/compat.h
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/elf.h
arch/x86/include/asm/hugetlb.h
arch/x86/include/asm/irq_work.h
arch/x86/include/asm/kgdb.h
arch/x86/include/asm/linkage.h
arch/x86/include/asm/msr.h
arch/x86/include/asm/mtrr.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/paravirt_types.h
arch/x86/include/asm/pat.h
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/segment.h
arch/x86/include/asm/setup.h
arch/x86/include/asm/switch_to.h
arch/x86/include/asm/text-patching.h [new file with mode: 0644]
arch/x86/include/asm/thread_info.h
arch/x86/include/asm/tlbflush.h
arch/x86/include/asm/tsc.h
arch/x86/include/asm/uaccess.h
arch/x86/include/asm/xor_32.h
arch/x86/include/asm/xor_avx.h
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/alternative.c
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/apic_noop.c
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/ipi.c
arch/x86/kernel/apic/vector.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/cyrix.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/mcheck/mce_intel.c
arch/x86/kernel/cpu/mcheck/therm_throt.c
arch/x86/kernel/cpu/mtrr/cyrix.c
arch/x86/kernel/cpu/mtrr/generic.c
arch/x86/kernel/cpu/mtrr/main.c
arch/x86/kernel/cpu/mtrr/mtrr.h
arch/x86/kernel/cpu/vmware.c
arch/x86/kernel/devicetree.c
arch/x86/kernel/fpu/bugs.c
arch/x86/kernel/fpu/core.c
arch/x86/kernel/fpu/init.c
arch/x86/kernel/fpu/regset.c
arch/x86/kernel/fpu/xstate.c
arch/x86/kernel/head_32.S
arch/x86/kernel/head_64.S
arch/x86/kernel/hpet.c
arch/x86/kernel/jump_label.c
arch/x86/kernel/kgdb.c
arch/x86/kernel/kprobes/core.c
arch/x86/kernel/kprobes/opt.c
arch/x86/kernel/kvm.c
arch/x86/kernel/module.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/process_64.c
arch/x86/kernel/ptrace.c
arch/x86/kernel/signal.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/tce_64.c
arch/x86/kernel/tls.c
arch/x86/kernel/traps.c
arch/x86/kernel/tsc.c
arch/x86/kernel/uprobes.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/mmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/trace.h
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/lib/usercopy_32.c
arch/x86/mm/extable.c
arch/x86/mm/hugetlbpage.c
arch/x86/mm/init.c
arch/x86/mm/init_32.c
arch/x86/mm/init_64.c
arch/x86/mm/ioremap.c
arch/x86/mm/pageattr.c
arch/x86/mm/pat.c
arch/x86/oprofile/nmi_int.c
arch/x86/oprofile/op_model_ppro.c
arch/x86/pci/xen.c
arch/x86/power/hibernate_32.c
arch/x86/xen/enlighten.c
drivers/cpufreq/longhaul.c
drivers/gpu/drm/drm_cache.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/input/joystick/analog.c
drivers/iommu/irq_remapping.c
drivers/lguest/x86/core.c
drivers/net/hamradio/baycom_epp.c
drivers/staging/unisys/visorbus/visorchipset.c
include/asm-generic/vmlinux.lds.h
tools/testing/selftests/x86/Makefile
tools/testing/selftests/x86/fsgsbase.c [new file with mode: 0644]
tools/testing/selftests/x86/ldt_gdt.c

index 54944c71b819bd7b37aeb4d221ef02fffd201879..2a4ee6302122f8942ac08f6d26dbdf2f369f4e6e 100644 (file)
@@ -196,3 +196,35 @@ Another, more verbose way of getting PAT related debug messages is with
 "debugpat" boot parameter. With this parameter, various debug messages are
 printed to dmesg log.
 
+PAT Initialization
+------------------
+
+The following table describes how PAT is initialized under various
+configurations. The PAT MSR must be updated by Linux in order to support WC
+and WT attributes. Otherwise, the PAT MSR has the value programmed in it
+by the firmware. Note, Xen enables WC attribute in the PAT MSR for guests.
+
+ MTRR PAT   Call Sequence               PAT State  PAT MSR
+ =========================================================
+ E    E     MTRR -> PAT init            Enabled    OS
+ E    D     MTRR -> PAT init            Disabled    -
+ D    E     MTRR -> PAT disable         Disabled   BIOS
+ D    D     MTRR -> PAT disable         Disabled    -
+ -    np/E  PAT  -> PAT disable         Disabled   BIOS
+ -    np/D  PAT  -> PAT disable         Disabled    -
+ E    !P/E  MTRR -> PAT init            Disabled   BIOS
+ D    !P/E  MTRR -> PAT disable         Disabled   BIOS
+ !M   !P/E  MTRR stub -> PAT disable    Disabled   BIOS
+
+ Legend
+ ------------------------------------------------
+ E         Feature enabled in CPU
+ D        Feature disabled/unsupported in CPU
+ np       "nopat" boot option specified
+ !P       CONFIG_X86_PAT option unset
+ !M       CONFIG_MTRR option unset
+ Enabled   PAT state set to enabled
+ Disabled  PAT state set to disabled
+ OS        PAT initializes PAT MSR with OS setting
+ BIOS      PAT keeps PAT MSR with BIOS setting
+
index 105c93b00b1bc53ce22f04ffd249595b5001c4d2..1d1212901ae70138466b7b32a6e04538d8f73a9d 100644 (file)
@@ -1,7 +1,6 @@
 #ifndef _ASM_IA64_IOMMU_H
 #define _ASM_IA64_IOMMU_H 1
 
-#define cpu_has_x2apic 0
 /* 10 seconds */
 #define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
 
index 064c7e2bd7c8e8b9c10e709273217625a17ac6fe..5b7fa14710073bdc6902de2775d27c3f3ed53bb3 100644 (file)
@@ -1477,7 +1477,7 @@ static int __init aesni_init(void)
        }
        aesni_ctr_enc_tfm = aesni_ctr_enc;
 #ifdef CONFIG_AS_AVX
-       if (cpu_has_avx) {
+       if (boot_cpu_has(X86_FEATURE_AVX)) {
                /* optimize performance of ctr mode encryption transform */
                aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm;
                pr_info("AES CTR mode by8 optimization enabled\n");
index d844569245633845da1fe485dca9c44043be680f..60907c139c4e2a7842f412ebcbfd744085cb6020 100644 (file)
@@ -562,7 +562,10 @@ static int __init camellia_aesni_init(void)
 {
        const char *feature_name;
 
-       if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
+       if (!boot_cpu_has(X86_FEATURE_AVX) ||
+           !boot_cpu_has(X86_FEATURE_AVX2) ||
+           !boot_cpu_has(X86_FEATURE_AES) ||
+           !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
                pr_info("AVX2 or AES-NI instructions are not detected.\n");
                return -ENODEV;
        }
index 93d8f295784e399c2fc363d8b5602fede9039529..d96429da88eb8bf274620b79de372b2ac72adec4 100644 (file)
@@ -554,7 +554,9 @@ static int __init camellia_aesni_init(void)
 {
        const char *feature_name;
 
-       if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
+       if (!boot_cpu_has(X86_FEATURE_AVX) ||
+           !boot_cpu_has(X86_FEATURE_AES) ||
+           !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
                pr_info("AVX or AES-NI instructions are not detected.\n");
                return -ENODEV;
        }
index 8baaff5af0b572b27e9c488083d2d751216775d9..2d5c2e0bd939b9e267102adf3577a91e53abfce1 100644 (file)
@@ -129,7 +129,8 @@ static int __init chacha20_simd_mod_init(void)
                return -ENODEV;
 
 #ifdef CONFIG_AS_AVX2
-       chacha20_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
+       chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
+                           boot_cpu_has(X86_FEATURE_AVX2) &&
                            cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
 #endif
        return crypto_register_alg(&alg);
index 4264a3d595894b1a8e8bbba635f477878b0461dd..e32142bc071d9344533f39d2a6ee5326845b62f2 100644 (file)
@@ -179,11 +179,12 @@ static struct shash_alg alg = {
 
 static int __init poly1305_simd_mod_init(void)
 {
-       if (!cpu_has_xmm2)
+       if (!boot_cpu_has(X86_FEATURE_XMM2))
                return -ENODEV;
 
 #ifdef CONFIG_AS_AVX2
-       poly1305_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
+       poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
+                           boot_cpu_has(X86_FEATURE_AVX2) &&
                            cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
        alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
        if (poly1305_use_avx2)
index 6d198342e2de4951635c46aa3486c88cbc793274..870f6d812a2dd251392498ce10fabef37a3826ff 100644 (file)
@@ -538,7 +538,7 @@ static int __init init(void)
 {
        const char *feature_name;
 
-       if (!cpu_has_avx2 || !cpu_has_osxsave) {
+       if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
                pr_info("AVX2 instructions are not detected.\n");
                return -ENODEV;
        }
index 8943407e8917a343658c0a3c81ed9ec0dc6b1735..644f97ab8cace2910d91cf9abb798f044aef0c1f 100644 (file)
@@ -600,7 +600,7 @@ static struct crypto_alg serpent_algs[10] = { {
 
 static int __init serpent_sse2_init(void)
 {
-       if (!cpu_has_xmm2) {
+       if (!boot_cpu_has(X86_FEATURE_XMM2)) {
                printk(KERN_INFO "SSE2 instructions are not detected.\n");
                return -ENODEV;
        }
index dd14616b773970d13c2886f255c0f76b4eb58450..1024e378a358f69ef49864a0014cd6af5aed3523 100644 (file)
@@ -166,7 +166,7 @@ static struct shash_alg sha1_avx_alg = {
 static bool avx_usable(void)
 {
        if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
-               if (cpu_has_avx)
+               if (boot_cpu_has(X86_FEATURE_AVX))
                        pr_info("AVX detected but unusable.\n");
                return false;
        }
index 5f4d6086dc5913be7d680882ab94d2286ad3e230..3ae0f43ebd376527ec450f8a2dc16eee98f17fe9 100644 (file)
@@ -201,7 +201,7 @@ static struct shash_alg sha256_avx_algs[] = { {
 static bool avx_usable(void)
 {
        if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
-               if (cpu_has_avx)
+               if (boot_cpu_has(X86_FEATURE_AVX))
                        pr_info("AVX detected but unusable.\n");
                return false;
        }
index 34e5083d6f36540e967dc755384012ca35afd714..0b17c83d027ddcac6c211b2848486096a84cdbea 100644 (file)
@@ -151,7 +151,7 @@ asmlinkage void sha512_transform_avx(u64 *digest, const char *data,
 static bool avx_usable(void)
 {
        if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
-               if (cpu_has_avx)
+               if (boot_cpu_has(X86_FEATURE_AVX))
                        pr_info("AVX detected but unusable.\n");
                return false;
        }
index e79d93d44ecd9c66b1e29078a11aa1ee405c0fa2..ec138e538c44f9acf7f829827d14a3991473763f 100644 (file)
@@ -191,7 +191,7 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
 
 long syscall_trace_enter(struct pt_regs *regs)
 {
-       u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
+       u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
        unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
 
        if (phase1_result == 0)
index 10868aa734dc07e9a438bcdd3d4c7b4aae0106d3..983e5d3a0d271c387e24371ddc3fcd6e7110a27d 100644 (file)
 ENTRY(ret_from_fork)
        pushl   %eax
        call    schedule_tail
-       GET_THREAD_INFO(%ebp)
        popl    %eax
-       pushl   $0x0202                         # Reset kernel eflags
-       popfl
 
        /* When we fork, we trace the syscall return in the child, too. */
        movl    %esp, %eax
@@ -221,10 +218,7 @@ END(ret_from_fork)
 ENTRY(ret_from_kernel_thread)
        pushl   %eax
        call    schedule_tail
-       GET_THREAD_INFO(%ebp)
        popl    %eax
-       pushl   $0x0202                         # Reset kernel eflags
-       popfl
        movl    PT_EBP(%esp), %eax
        call    *PT_EBX(%esp)
        movl    $0, PT_EAX(%esp)
@@ -251,7 +245,6 @@ ENDPROC(ret_from_kernel_thread)
 ret_from_exception:
        preempt_stop(CLBR_ANY)
 ret_from_intr:
-       GET_THREAD_INFO(%ebp)
 #ifdef CONFIG_VM86
        movl    PT_EFLAGS(%esp), %eax           # mix EFLAGS and CS
        movb    PT_CS(%esp), %al
index 858b555e274b8d763d97d9b9cf14998125bce563..9ee0da1807edff462536e3628e1db217bb835cd8 100644 (file)
@@ -372,9 +372,6 @@ END(ptregs_\func)
 ENTRY(ret_from_fork)
        LOCK ; btr $TIF_FORK, TI_flags(%r8)
 
-       pushq   $0x0002
-       popfq                                   /* reset kernel eflags */
-
        call    schedule_tail                   /* rdi: 'prev' task parameter */
 
        testb   $3, CS(%rsp)                    /* from kernel_thread? */
@@ -781,19 +778,25 @@ ENTRY(native_load_gs_index)
        pushfq
        DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
        SWAPGS
-gs_change:
+.Lgs_change:
        movl    %edi, %gs
-2:     mfence                                  /* workaround */
+2:     ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
        SWAPGS
        popfq
        ret
 END(native_load_gs_index)
 
-       _ASM_EXTABLE(gs_change, bad_gs)
+       _ASM_EXTABLE(.Lgs_change, bad_gs)
        .section .fixup, "ax"
        /* running with kernelgs */
 bad_gs:
        SWAPGS                                  /* switch back to user gs */
+.macro ZAP_GS
+       /* This can't be a string because the preprocessor needs to see it. */
+       movl $__USER_DS, %eax
+       movl %eax, %gs
+.endm
+       ALTERNATIVE "", "ZAP_GS", X86_BUG_NULL_SEG
        xorl    %eax, %eax
        movl    %eax, %gs
        jmp     2b
@@ -1019,13 +1022,13 @@ ENTRY(error_entry)
        movl    %ecx, %eax                      /* zero extend */
        cmpq    %rax, RIP+8(%rsp)
        je      .Lbstep_iret
-       cmpq    $gs_change, RIP+8(%rsp)
+       cmpq    $.Lgs_change, RIP+8(%rsp)
        jne     .Lerror_entry_done
 
        /*
-        * hack: gs_change can fail with user gsbase.  If this happens, fix up
+        * hack: .Lgs_change can fail with user gsbase.  If this happens, fix up
         * gsbase and proceed.  We'll fix up the exception and land in
-        * gs_change's error handler with kernel gsbase.
+        * .Lgs_change's error handler with kernel gsbase.
         */
        jmp     .Lerror_entry_from_usermode_swapgs
 
index 847f2f0c31e50d1029a2e39600ed86e772ef8847..e1721dafbcb13fab9230cc20d598b18ebef8306b 100644 (file)
@@ -72,24 +72,23 @@ ENTRY(entry_SYSENTER_compat)
        pushfq                          /* pt_regs->flags (except IF = 0) */
        orl     $X86_EFLAGS_IF, (%rsp)  /* Fix saved flags */
        pushq   $__USER32_CS            /* pt_regs->cs */
-       xorq    %r8,%r8
-       pushq   %r8                     /* pt_regs->ip = 0 (placeholder) */
+       pushq   $0                      /* pt_regs->ip = 0 (placeholder) */
        pushq   %rax                    /* pt_regs->orig_ax */
        pushq   %rdi                    /* pt_regs->di */
        pushq   %rsi                    /* pt_regs->si */
        pushq   %rdx                    /* pt_regs->dx */
        pushq   %rcx                    /* pt_regs->cx */
        pushq   $-ENOSYS                /* pt_regs->ax */
-       pushq   %r8                     /* pt_regs->r8  = 0 */
-       pushq   %r8                     /* pt_regs->r9  = 0 */
-       pushq   %r8                     /* pt_regs->r10 = 0 */
-       pushq   %r8                     /* pt_regs->r11 = 0 */
+       pushq   $0                      /* pt_regs->r8  = 0 */
+       pushq   $0                      /* pt_regs->r9  = 0 */
+       pushq   $0                      /* pt_regs->r10 = 0 */
+       pushq   $0                      /* pt_regs->r11 = 0 */
        pushq   %rbx                    /* pt_regs->rbx */
        pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
-       pushq   %r8                     /* pt_regs->r12 = 0 */
-       pushq   %r8                     /* pt_regs->r13 = 0 */
-       pushq   %r8                     /* pt_regs->r14 = 0 */
-       pushq   %r8                     /* pt_regs->r15 = 0 */
+       pushq   $0                      /* pt_regs->r12 = 0 */
+       pushq   $0                      /* pt_regs->r13 = 0 */
+       pushq   $0                      /* pt_regs->r14 = 0 */
+       pushq   $0                      /* pt_regs->r15 = 0 */
        cld
 
        /*
@@ -205,17 +204,16 @@ ENTRY(entry_SYSCALL_compat)
        pushq   %rdx                    /* pt_regs->dx */
        pushq   %rbp                    /* pt_regs->cx (stashed in bp) */
        pushq   $-ENOSYS                /* pt_regs->ax */
-       xorq    %r8,%r8
-       pushq   %r8                     /* pt_regs->r8  = 0 */
-       pushq   %r8                     /* pt_regs->r9  = 0 */
-       pushq   %r8                     /* pt_regs->r10 = 0 */
-       pushq   %r8                     /* pt_regs->r11 = 0 */
+       pushq   $0                      /* pt_regs->r8  = 0 */
+       pushq   $0                      /* pt_regs->r9  = 0 */
+       pushq   $0                      /* pt_regs->r10 = 0 */
+       pushq   $0                      /* pt_regs->r11 = 0 */
        pushq   %rbx                    /* pt_regs->rbx */
        pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
-       pushq   %r8                     /* pt_regs->r12 = 0 */
-       pushq   %r8                     /* pt_regs->r13 = 0 */
-       pushq   %r8                     /* pt_regs->r14 = 0 */
-       pushq   %r8                     /* pt_regs->r15 = 0 */
+       pushq   $0                      /* pt_regs->r12 = 0 */
+       pushq   $0                      /* pt_regs->r13 = 0 */
+       pushq   $0                      /* pt_regs->r14 = 0 */
+       pushq   $0                      /* pt_regs->r15 = 0 */
 
        /*
         * User mode is traced as though IRQs are on, and SYSENTER
@@ -316,11 +314,10 @@ ENTRY(entry_INT80_compat)
        pushq   %rdx                    /* pt_regs->dx */
        pushq   %rcx                    /* pt_regs->cx */
        pushq   $-ENOSYS                /* pt_regs->ax */
-       xorq    %r8,%r8
-       pushq   %r8                     /* pt_regs->r8  = 0 */
-       pushq   %r8                     /* pt_regs->r9  = 0 */
-       pushq   %r8                     /* pt_regs->r10 = 0 */
-       pushq   %r8                     /* pt_regs->r11 = 0 */
+       pushq   $0                      /* pt_regs->r8  = 0 */
+       pushq   $0                      /* pt_regs->r9  = 0 */
+       pushq   $0                      /* pt_regs->r10 = 0 */
+       pushq   $0                      /* pt_regs->r11 = 0 */
        pushq   %rbx                    /* pt_regs->rbx */
        pushq   %rbp                    /* pt_regs->rbp */
        pushq   %r12                    /* pt_regs->r12 */
index cac6d17ce5db000ea008d63d5905acbd6fea21e0..555263e385c9210af5f70e08dd27871005c5a865 100644 (file)
 543    x32     io_setup                compat_sys_io_setup
 544    x32     io_submit               compat_sys_io_submit
 545    x32     execveat                compat_sys_execveat/ptregs
+534    x32     preadv2                 compat_sys_preadv2
+535    x32     pwritev2                compat_sys_pwritev2
index 03c3eb77bfcebce765b838271b6d7789365ff3de..2f02d23a05ef4b20c8a620c9bef4e7943e02ab1f 100644 (file)
@@ -13,7 +13,6 @@
 
 #include <uapi/linux/time.h>
 #include <asm/vgtod.h>
-#include <asm/hpet.h>
 #include <asm/vvar.h>
 #include <asm/unistd.h>
 #include <asm/msr.h>
@@ -28,16 +27,6 @@ extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
 extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
 extern time_t __vdso_time(time_t *t);
 
-#ifdef CONFIG_HPET_TIMER
-extern u8 hpet_page
-       __attribute__((visibility("hidden")));
-
-static notrace cycle_t vread_hpet(void)
-{
-       return *(const volatile u32 *)(&hpet_page + HPET_COUNTER);
-}
-#endif
-
 #ifdef CONFIG_PARAVIRT_CLOCK
 extern u8 pvclock_page
        __attribute__((visibility("hidden")));
@@ -195,10 +184,6 @@ notrace static inline u64 vgetsns(int *mode)
 
        if (gtod->vclock_mode == VCLOCK_TSC)
                cycles = vread_tsc();
-#ifdef CONFIG_HPET_TIMER
-       else if (gtod->vclock_mode == VCLOCK_HPET)
-               cycles = vread_hpet();
-#endif
 #ifdef CONFIG_PARAVIRT_CLOCK
        else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
                cycles = vread_pvclock(mode);
index 4158acc17df07c355f0d86e19fd3b4f9b43e590e..a708aa90b507fce048e05676f073f7b9f3347e59 100644 (file)
@@ -25,7 +25,7 @@ SECTIONS
         * segment.
         */
 
-       vvar_start = . - 3 * PAGE_SIZE;
+       vvar_start = . - 2 * PAGE_SIZE;
        vvar_page = vvar_start;
 
        /* Place all vvars at the offsets in asm/vvar.h. */
@@ -35,8 +35,7 @@ SECTIONS
 #undef __VVAR_KERNEL_LDS
 #undef EMIT_VVAR
 
-       hpet_page = vvar_start + PAGE_SIZE;
-       pvclock_page = vvar_start + 2 * PAGE_SIZE;
+       pvclock_page = vvar_start + PAGE_SIZE;
 
        . = SIZEOF_HEADERS;
 
index 10f704584922653fd208646cac11c4f8a9cd776b..b3cf81333a54edf146c26a747db201111b7863d3 100644 (file)
@@ -18,7 +18,6 @@
 #include <asm/vdso.h>
 #include <asm/vvar.h>
 #include <asm/page.h>
-#include <asm/hpet.h>
 #include <asm/desc.h>
 #include <asm/cpufeature.h>
 
@@ -129,16 +128,6 @@ static int vvar_fault(const struct vm_special_mapping *sm,
        if (sym_offset == image->sym_vvar_page) {
                ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
                                    __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
-       } else if (sym_offset == image->sym_hpet_page) {
-#ifdef CONFIG_HPET_TIMER
-               if (hpet_address && vclock_was_used(VCLOCK_HPET)) {
-                       ret = vm_insert_pfn_prot(
-                               vma,
-                               (unsigned long)vmf->virtual_address,
-                               hpet_address >> PAGE_SHIFT,
-                               pgprot_noncached(PAGE_READONLY));
-               }
-#endif
        } else if (sym_offset == image->sym_pvclock_page) {
                struct pvclock_vsyscall_time_info *pvti =
                        pvclock_pvti_cpu0_va();
index b7080bef91376ea78010e65549116bc9e1d9a006..73a75aa5a66db39d69a6c4f60e091f32fd570c04 100644 (file)
@@ -1524,7 +1524,7 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 
 static void __init pmu_check_apic(void)
 {
-       if (cpu_has_apic)
+       if (boot_cpu_has(X86_FEATURE_APIC))
                return;
 
        x86_pmu.apic = 0;
index 17734a6ef474c8e9090181ec4ffbcee4f11e538f..16c1789164122b70f6d2cd4ad16ae18b741236aa 100644 (file)
@@ -1400,7 +1400,7 @@ static int __init intel_uncore_init(void)
        if (!id)
                return -ENODEV;
 
-       if (cpu_has_hypervisor)
+       if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
                return -ENODEV;
 
        max_packages = topology_max_packages();
index 0552884da18db24c7910fa8166a1385f56b6d4a9..2f29f4e407c315114b6866e496b04e8d4ab51a1d 100644 (file)
@@ -357,7 +357,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
                put_user_ex(ptr_to_compat(&frame->uc), &frame->puc);
 
                /* Create the ucontext.  */
-               if (cpu_has_xsave)
+               if (boot_cpu_has(X86_FEATURE_XSAVE))
                        put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
                else
                        put_user_ex(0, &frame->uc.uc_flags);
index 99afb665a004cb8ea82c1751f6ef29b12d116cc6..e77a6443104ff1d2b3162e17b1958b753c95be39 100644 (file)
@@ -1,11 +1,12 @@
 #ifndef _ASM_X86_ALTERNATIVE_H
 #define _ASM_X86_ALTERNATIVE_H
 
+#ifndef __ASSEMBLY__
+
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/stringify.h>
 #include <asm/asm.h>
-#include <asm/ptrace.h>
 
 /*
  * Alternative inline assembly for SMP.
@@ -233,36 +234,6 @@ static inline int alternatives_text_reserved(void *start, void *end)
  */
 #define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
 
-struct paravirt_patch_site;
-#ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch_site *start,
-                   struct paravirt_patch_site *end);
-#else
-static inline void apply_paravirt(struct paravirt_patch_site *start,
-                                 struct paravirt_patch_site *end)
-{}
-#define __parainstructions     NULL
-#define __parainstructions_end NULL
-#endif
-
-extern void *text_poke_early(void *addr, const void *opcode, size_t len);
-
-/*
- * Clear and restore the kernel write-protection flag on the local CPU.
- * Allows the kernel to edit read-only pages.
- * Side-effect: any interrupt handler running between save and restore will have
- * the ability to write to read-only pages.
- *
- * Warning:
- * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and
- * no thread can be preempted in the instructions being modified (no iret to an
- * invalid instruction possible) or if the instructions are changed from a
- * consistent state to another consistent state atomically.
- * On the local CPU you need to be protected again NMI or MCE handlers seeing an
- * inconsistent instruction while you patch.
- */
-extern void *text_poke(void *addr, const void *opcode, size_t len);
-extern int poke_int3_handler(struct pt_regs *regs);
-extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+#endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_X86_ALTERNATIVE_H */
index 98f25bbafac4c52c10e87dc8cd85e6e3914fa4d6..bc27611fa58f1b0b522beefb3117afb1f5c250e6 100644 (file)
@@ -239,10 +239,10 @@ extern void __init check_x2apic(void);
 extern void x2apic_setup(void);
 static inline int x2apic_enabled(void)
 {
-       return cpu_has_x2apic && apic_is_x2apic_enabled();
+       return boot_cpu_has(X86_FEATURE_X2APIC) && apic_is_x2apic_enabled();
 }
 
-#define x2apic_supported()     (cpu_has_x2apic)
+#define x2apic_supported()     (boot_cpu_has(X86_FEATURE_X2APIC))
 #else /* !CONFIG_X86_X2APIC */
 static inline void check_x2apic(void) { }
 static inline void x2apic_setup(void) { }
index d194266acb28e52d237c19c21291633c15d99c9e..eae33c7170c868993ffc3b861ca30ddc14285aa7 100644 (file)
@@ -3,11 +3,10 @@
 #ifndef _ASM_X86_CLOCKSOURCE_H
 #define _ASM_X86_CLOCKSOURCE_H
 
-#define VCLOCK_NONE    0  /* No vDSO clock available.  */
-#define VCLOCK_TSC     1  /* vDSO should use vread_tsc.        */
-#define VCLOCK_HPET    2  /* vDSO should use vread_hpet.       */
-#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
-#define VCLOCK_MAX     3
+#define VCLOCK_NONE    0       /* No vDSO clock available.             */
+#define VCLOCK_TSC     1       /* vDSO should use vread_tsc.           */
+#define VCLOCK_PVCLOCK 2       /* vDSO should use vread_pvclock.       */
+#define VCLOCK_MAX     2
 
 struct arch_clocksource_data {
        int vclock_mode;
index ebb102e1bbc7ad84cff580de23380becbe900ed1..5a3b2c119ed0eb70137bff968607f5d44481f223 100644 (file)
@@ -307,7 +307,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
        return (void __user *)round_down(sp - len, 16);
 }
 
-static inline bool is_x32_task(void)
+static inline bool in_x32_syscall(void)
 {
 #ifdef CONFIG_X86_X32_ABI
        if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)
@@ -318,7 +318,7 @@ static inline bool is_x32_task(void)
 
 static inline bool in_compat_syscall(void)
 {
-       return is_ia32_task() || is_x32_task();
+       return in_ia32_syscall() || in_x32_syscall();
 }
 #define in_compat_syscall in_compat_syscall    /* override the generic impl */
 
index 53ac9bbf20646ba13d4887bbe53c8f8f684ad499..25ebb54905e0001801fc45ea86d6f8a111f011e2 100644 (file)
@@ -119,31 +119,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
        set_bit(bit, (unsigned long *)cpu_caps_set);    \
 } while (0)
 
-#define cpu_has_fpu            boot_cpu_has(X86_FEATURE_FPU)
-#define cpu_has_pse            boot_cpu_has(X86_FEATURE_PSE)
-#define cpu_has_tsc            boot_cpu_has(X86_FEATURE_TSC)
-#define cpu_has_pge            boot_cpu_has(X86_FEATURE_PGE)
-#define cpu_has_apic           boot_cpu_has(X86_FEATURE_APIC)
-#define cpu_has_fxsr           boot_cpu_has(X86_FEATURE_FXSR)
-#define cpu_has_xmm            boot_cpu_has(X86_FEATURE_XMM)
-#define cpu_has_xmm2           boot_cpu_has(X86_FEATURE_XMM2)
-#define cpu_has_aes            boot_cpu_has(X86_FEATURE_AES)
-#define cpu_has_avx            boot_cpu_has(X86_FEATURE_AVX)
-#define cpu_has_avx2           boot_cpu_has(X86_FEATURE_AVX2)
-#define cpu_has_clflush                boot_cpu_has(X86_FEATURE_CLFLUSH)
-#define cpu_has_gbpages                boot_cpu_has(X86_FEATURE_GBPAGES)
-#define cpu_has_arch_perfmon   boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
-#define cpu_has_pat            boot_cpu_has(X86_FEATURE_PAT)
-#define cpu_has_x2apic         boot_cpu_has(X86_FEATURE_X2APIC)
-#define cpu_has_xsave          boot_cpu_has(X86_FEATURE_XSAVE)
-#define cpu_has_xsaves         boot_cpu_has(X86_FEATURE_XSAVES)
-#define cpu_has_osxsave                boot_cpu_has(X86_FEATURE_OSXSAVE)
-#define cpu_has_hypervisor     boot_cpu_has(X86_FEATURE_HYPERVISOR)
-/*
- * Do not add any more of those clumsy macros - use static_cpu_has() for
- * fast paths and boot_cpu_has() otherwise!
- */
-
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
index 0aee9dd1976ed5d1871f2a01b9981376fbef3d8e..4a413485f9eb8ef58ec71c77ff2594f4300c8ea6 100644 (file)
 #define X86_BUG_FXSAVE_LEAK    X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
 #define X86_BUG_CLFLUSH_MONITOR        X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
 #define X86_BUG_SYSRET_SS_ATTRS        X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#define X86_BUG_NULL_SEG       X86_BUG(9) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE   X86_BUG(10) /* SWAPGS without input dep on GS */
+
 
 #ifdef CONFIG_X86_32
 /*
index 15340e36ddcb3364e16eb63cd61c61a42676d756..fea7724141a04be703c86032db79cc29d37148ca 100644 (file)
@@ -176,7 +176,7 @@ static inline void elf_common_init(struct thread_struct *t,
        regs->si = regs->di = regs->bp = 0;
        regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
        regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
-       t->fs = t->gs = 0;
+       t->fsbase = t->gsbase = 0;
        t->fsindex = t->gsindex = 0;
        t->ds = t->es = ds;
 }
@@ -226,8 +226,8 @@ do {                                                                \
        (pr_reg)[18] = (regs)->flags;                           \
        (pr_reg)[19] = (regs)->sp;                              \
        (pr_reg)[20] = (regs)->ss;                              \
-       (pr_reg)[21] = current->thread.fs;                      \
-       (pr_reg)[22] = current->thread.gs;                      \
+       (pr_reg)[21] = current->thread.fsbase;                  \
+       (pr_reg)[22] = current->thread.gsbase;                  \
        asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v;       \
        asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v;       \
        asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v;       \
index e6a8613fbfb0ea19f8d507c4ce6ae1f0d4a35be6..3a106165e03ad035484df5fb97989265798263df 100644 (file)
@@ -4,7 +4,7 @@
 #include <asm/page.h>
 #include <asm-generic/hugetlb.h>
 
-#define hugepages_supported() cpu_has_pse
+#define hugepages_supported() boot_cpu_has(X86_FEATURE_PSE)
 
 static inline int is_hugepage_only_range(struct mm_struct *mm,
                                         unsigned long addr,
index d0afb05c84fc1ffd434e85f6267326b9091d2595..f70604125286b90d820bc14be0b65ecccb4f7a44 100644 (file)
@@ -5,7 +5,7 @@
 
 static inline bool arch_irq_work_has_interrupt(void)
 {
-       return cpu_has_apic;
+       return boot_cpu_has(X86_FEATURE_APIC);
 }
 
 #endif /* _ASM_IRQ_WORK_H */
index 332f98c9111f41d92def3e02f0771783c8dce100..22a8537eb780b231f24cdec6b2b1797cd7cd5868 100644 (file)
@@ -6,6 +6,8 @@
  * Copyright (C) 2008 Wind River Systems, Inc.
  */
 
+#include <asm/ptrace.h>
+
 /*
  * BUFMAX defines the maximum number of characters in inbound/outbound
  * buffers at least NUMREGBYTES*2 are needed for register packets
index 79327e9483a34ec33c3ba71bbdc94397f2456f46..0ccb26dda126da6d5876c84264c290d1fa203d58 100644 (file)
@@ -8,40 +8,6 @@
 
 #ifdef CONFIG_X86_32
 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
-
-/*
- * Make sure the compiler doesn't do anything stupid with the
- * arguments on the stack - they are owned by the *caller*, not
- * the callee. This just fools gcc into not spilling into them,
- * and keeps it from doing tailcall recursion and/or using the
- * stack slots for temporaries, since they are live and "used"
- * all the way to the end of the function.
- *
- * NOTE! On x86-64, all the arguments are in registers, so this
- * only matters on a 32-bit kernel.
- */
-#define asmlinkage_protect(n, ret, args...) \
-       __asmlinkage_protect##n(ret, ##args)
-#define __asmlinkage_protect_n(ret, args...) \
-       __asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), ##args)
-#define __asmlinkage_protect0(ret) \
-       __asmlinkage_protect_n(ret)
-#define __asmlinkage_protect1(ret, arg1) \
-       __asmlinkage_protect_n(ret, "m" (arg1))
-#define __asmlinkage_protect2(ret, arg1, arg2) \
-       __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2))
-#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \
-       __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3))
-#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \
-       __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
-                             "m" (arg4))
-#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \
-       __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
-                             "m" (arg4), "m" (arg5))
-#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \
-       __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
-                             "m" (arg4), "m" (arg5), "m" (arg6))
-
 #endif /* CONFIG_X86_32 */
 
 #ifdef __ASSEMBLY__
index 7a79ee2778b3b5067fa816e541bf93bc26cf782d..7dc1d8fef7fdec6a633f5cd3f7ada4d2ba09e8fe 100644 (file)
@@ -84,7 +84,10 @@ static inline unsigned long long native_read_msr(unsigned int msr)
 {
        DECLARE_ARGS(val, low, high);
 
-       asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
+       asm volatile("1: rdmsr\n"
+                    "2:\n"
+                    _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe)
+                    : EAX_EDX_RET(val, low, high) : "c" (msr));
        if (msr_tracepoint_active(__tracepoint_read_msr))
                do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0);
        return EAX_EDX_VAL(val, low, high);
@@ -98,7 +101,10 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
        asm volatile("2: rdmsr ; xor %[err],%[err]\n"
                     "1:\n\t"
                     ".section .fixup,\"ax\"\n\t"
-                    "3:  mov %[fault],%[err] ; jmp 1b\n\t"
+                    "3: mov %[fault],%[err]\n\t"
+                    "xorl %%eax, %%eax\n\t"
+                    "xorl %%edx, %%edx\n\t"
+                    "jmp 1b\n\t"
                     ".previous\n\t"
                     _ASM_EXTABLE(2b, 3b)
                     : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
@@ -108,10 +114,14 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
        return EAX_EDX_VAL(val, low, high);
 }
 
-static inline void native_write_msr(unsigned int msr,
-                                   unsigned low, unsigned high)
+/* Can be uninlined because referenced by paravirt */
+notrace static inline void native_write_msr(unsigned int msr,
+                                           unsigned low, unsigned high)
 {
-       asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
+       asm volatile("1: wrmsr\n"
+                    "2:\n"
+                    _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
+                    : : "c" (msr), "a"(low), "d" (high) : "memory");
        if (msr_tracepoint_active(__tracepoint_read_msr))
                do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
 }
index b94f6f64e23d0cf7e630c190fe48518b47e819ed..dbff1456d2152a6993ba5f381f6a7ef838b52f28 100644 (file)
@@ -24,6 +24,7 @@
 #define _ASM_X86_MTRR_H
 
 #include <uapi/asm/mtrr.h>
+#include <asm/pat.h>
 
 
 /*
@@ -83,9 +84,12 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
 static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
 {
 }
+static inline void mtrr_bp_init(void)
+{
+       pat_disable("MTRRs disabled, skipping PAT initialization too.");
+}
 
 #define mtrr_ap_init() do {} while (0)
-#define mtrr_bp_init() do {} while (0)
 #define set_mtrr_aps_delayed_init() do {} while (0)
 #define mtrr_aps_init() do {} while (0)
 #define mtrr_bp_restore() do {} while (0)
index 601f1b8f9961af35f113ad456b1ab90f1aceaced..3c731413f1dee40a1d07202c524f31aa1d01dea9 100644 (file)
@@ -130,21 +130,31 @@ static inline void wbinvd(void)
 
 #define get_kernel_rpl()  (pv_info.kernel_rpl)
 
-static inline u64 paravirt_read_msr(unsigned msr, int *err)
+static inline u64 paravirt_read_msr(unsigned msr)
 {
-       return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
+       return PVOP_CALL1(u64, pv_cpu_ops.read_msr, msr);
 }
 
-static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
+static inline void paravirt_write_msr(unsigned msr,
+                                     unsigned low, unsigned high)
 {
-       return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
+       return PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high);
+}
+
+static inline u64 paravirt_read_msr_safe(unsigned msr, int *err)
+{
+       return PVOP_CALL2(u64, pv_cpu_ops.read_msr_safe, msr, err);
+}
+
+static inline int paravirt_write_msr_safe(unsigned msr,
+                                         unsigned low, unsigned high)
+{
+       return PVOP_CALL3(int, pv_cpu_ops.write_msr_safe, msr, low, high);
 }
 
-/* These should all do BUG_ON(_err), but our headers are too tangled. */
 #define rdmsr(msr, val1, val2)                 \
 do {                                           \
-       int _err;                               \
-       u64 _l = paravirt_read_msr(msr, &_err); \
+       u64 _l = paravirt_read_msr(msr);        \
        val1 = (u32)_l;                         \
        val2 = _l >> 32;                        \
 } while (0)
@@ -156,8 +166,7 @@ do {                                                \
 
 #define rdmsrl(msr, val)                       \
 do {                                           \
-       int _err;                               \
-       val = paravirt_read_msr(msr, &_err);    \
+       val = paravirt_read_msr(msr);           \
 } while (0)
 
 static inline void wrmsrl(unsigned msr, u64 val)
@@ -165,23 +174,23 @@ static inline void wrmsrl(unsigned msr, u64 val)
        wrmsr(msr, (u32)val, (u32)(val>>32));
 }
 
-#define wrmsr_safe(msr, a, b)  paravirt_write_msr(msr, a, b)
+#define wrmsr_safe(msr, a, b)  paravirt_write_msr_safe(msr, a, b)
 
 /* rdmsr with exception handling */
-#define rdmsr_safe(msr, a, b)                  \
-({                                             \
-       int _err;                               \
-       u64 _l = paravirt_read_msr(msr, &_err); \
-       (*a) = (u32)_l;                         \
-       (*b) = _l >> 32;                        \
-       _err;                                   \
+#define rdmsr_safe(msr, a, b)                          \
+({                                                     \
+       int _err;                                       \
+       u64 _l = paravirt_read_msr_safe(msr, &_err);    \
+       (*a) = (u32)_l;                                 \
+       (*b) = _l >> 32;                                \
+       _err;                                           \
 })
 
 static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
 {
        int err;
 
-       *p = paravirt_read_msr(msr, &err);
+       *p = paravirt_read_msr_safe(msr, &err);
        return err;
 }
 
index e8c2326478c8fabc4e03c04534c599072e2598fe..b4a23eafa1b95e2e0d63f42a54c8725732e99df8 100644 (file)
@@ -155,10 +155,16 @@ struct pv_cpu_ops {
        void (*cpuid)(unsigned int *eax, unsigned int *ebx,
                      unsigned int *ecx, unsigned int *edx);
 
-       /* MSR, PMC and TSR operations.
-          err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
-       u64 (*read_msr)(unsigned int msr, int *err);
-       int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
+       /* Unsafe MSR operations.  These will warn or panic on failure. */
+       u64 (*read_msr)(unsigned int msr);
+       void (*write_msr)(unsigned int msr, unsigned low, unsigned high);
+
+       /*
+        * Safe MSR operations.
+        * read sets err to 0 or -EIO.  write returns 0 or -EIO.
+        */
+       u64 (*read_msr_safe)(unsigned int msr, int *err);
+       int (*write_msr_safe)(unsigned int msr, unsigned low, unsigned high);
 
        u64 (*read_pmc)(int counter);
 
index ca6c228d5e62837be88984b652bb436949295d03..0b1ff4c1c14e782c0375027ce99cab09e96a04fb 100644 (file)
@@ -5,8 +5,8 @@
 #include <asm/pgtable_types.h>
 
 bool pat_enabled(void);
+void pat_disable(const char *reason);
 extern void pat_init(void);
-void pat_init_cache_modes(u64);
 
 extern int reserve_memtype(u64 start, u64 end,
                enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
index 97f3242e133ccc9c2866baade8ca4f82ade65f04..f86491a7bc9dd1c8c96f52f28b9befd0ff59d6ea 100644 (file)
@@ -183,7 +183,7 @@ static inline int pmd_trans_huge(pmd_t pmd)
 
 static inline int has_transparent_hugepage(void)
 {
-       return cpu_has_pse;
+       return boot_cpu_has(X86_FEATURE_PSE);
 }
 
 #ifdef __HAVE_ARCH_PTE_DEVMAP
index 9264476f3d578e8fa346411aad4900e85afcb41a..9251aa9627216c3a58dbd46efd1b1ca9298d6331 100644 (file)
@@ -388,9 +388,16 @@ struct thread_struct {
        unsigned long           ip;
 #endif
 #ifdef CONFIG_X86_64
-       unsigned long           fs;
+       unsigned long           fsbase;
+       unsigned long           gsbase;
+#else
+       /*
+        * XXX: this could presumably be unsigned short.  Alternatively,
+        * 32-bit kernels could be taught to use fsindex instead.
+        */
+       unsigned long fs;
+       unsigned long gs;
 #endif
-       unsigned long           gs;
 
        /* Save middle states of ptrace breakpoints */
        struct perf_event       *ptrace_bps[HBP_NUM];
index 7d5a1929d76b31bba69295e533e460ed50904cfd..1549caa098f0828e6c533f714f22a7f97532427e 100644 (file)
@@ -2,6 +2,7 @@
 #define _ASM_X86_SEGMENT_H
 
 #include <linux/const.h>
+#include <asm/alternative.h>
 
 /*
  * Constructor for a conventional segment GDT (or LDT) entry.
 #define __USER_CS                      (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
 #define __PER_CPU_SEG                  (GDT_ENTRY_PER_CPU*8 + 3)
 
-/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */
-#define FS_TLS                         0
-#define GS_TLS                         1
-
-#define GS_TLS_SEL                     ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
-#define FS_TLS_SEL                     ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
-
 #endif
 
 #ifndef CONFIG_PARAVIRT
@@ -249,10 +243,13 @@ extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDL
 #endif
 
 /*
- * Load a segment. Fall back on loading the zero
- * segment if something goes wrong..
+ * Load a segment. Fall back on loading the zero segment if something goes
+ * wrong.  This variant assumes that loading zero fully clears the segment.
+ * This is always the case on Intel CPUs and, even on 64-bit AMD CPUs, any
+ * failure to fully clear the cached descriptor is only observable for
+ * FS and GS.
  */
-#define loadsegment(seg, value)                                                \
+#define __loadsegment_simple(seg, value)                               \
 do {                                                                   \
        unsigned short __val = (value);                                 \
                                                                        \
@@ -269,6 +266,38 @@ do {                                                                       \
                     : "+r" (__val) : : "memory");                      \
 } while (0)
 
+#define __loadsegment_ss(value) __loadsegment_simple(ss, (value))
+#define __loadsegment_ds(value) __loadsegment_simple(ds, (value))
+#define __loadsegment_es(value) __loadsegment_simple(es, (value))
+
+#ifdef CONFIG_X86_32
+
+/*
+ * On 32-bit systems, the hidden parts of FS and GS are unobservable if
+ * the selector is NULL, so there's no funny business here.
+ */
+#define __loadsegment_fs(value) __loadsegment_simple(fs, (value))
+#define __loadsegment_gs(value) __loadsegment_simple(gs, (value))
+
+#else
+
+static inline void __loadsegment_fs(unsigned short value)
+{
+       asm volatile("                                          \n"
+                    "1:        movw %0, %%fs                   \n"
+                    "2:                                        \n"
+
+                    _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_clear_fs)
+
+                    : : "rm" (value) : "memory");
+}
+
+/* __loadsegment_gs is intentionally undefined.  Use load_gs_index instead. */
+
+#endif
+
+#define loadsegment(seg, value) __loadsegment_ ## seg (value)
+
 /*
  * Save a segment register away:
  */
index 11af24e09c8a667911bf9f0b4a3e17b7c75acf16..ac1d5da1473429b930ca38dc68a7875221d0c589 100644 (file)
@@ -6,6 +6,7 @@
 #define COMMAND_LINE_SIZE 2048
 
 #include <linux/linkage.h>
+#include <asm/page_types.h>
 
 #ifdef __i386__
 
index 751bf4b7bf114da12231a56f4217c2583ddeafb2..8f321a1b03a1aaa0e87c4c1182d2b2f282efa1e4 100644 (file)
@@ -39,8 +39,7 @@ do {                                                                  \
         */                                                             \
        unsigned long ebx, ecx, edx, esi, edi;                          \
                                                                        \
-       asm volatile("pushfl\n\t"               /* save    flags */     \
-                    "pushl %%ebp\n\t"          /* save    EBP   */     \
+       asm volatile("pushl %%ebp\n\t"          /* save    EBP   */     \
                     "movl %%esp,%[prev_sp]\n\t"        /* save    ESP   */ \
                     "movl %[next_sp],%%esp\n\t"        /* restore ESP   */ \
                     "movl $1f,%[prev_ip]\n\t"  /* save    EIP   */     \
@@ -49,7 +48,6 @@ do {                                                                  \
                     "jmp __switch_to\n"        /* regparm call  */     \
                     "1:\t"                                             \
                     "popl %%ebp\n\t"           /* restore EBP   */     \
-                    "popfl\n"                  /* restore flags */     \
                                                                        \
                     /* output parameters */                            \
                     : [prev_sp] "=m" (prev->thread.sp),                \
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
new file mode 100644 (file)
index 0000000..9039506
--- /dev/null
@@ -0,0 +1,40 @@
+#ifndef _ASM_X86_TEXT_PATCHING_H
+#define _ASM_X86_TEXT_PATCHING_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <asm/ptrace.h>
+
+struct paravirt_patch_site;
+#ifdef CONFIG_PARAVIRT
+void apply_paravirt(struct paravirt_patch_site *start,
+                   struct paravirt_patch_site *end);
+#else
+static inline void apply_paravirt(struct paravirt_patch_site *start,
+                                 struct paravirt_patch_site *end)
+{}
+#define __parainstructions     NULL
+#define __parainstructions_end NULL
+#endif
+
+extern void *text_poke_early(void *addr, const void *opcode, size_t len);
+
+/*
+ * Clear and restore the kernel write-protection flag on the local CPU.
+ * Allows the kernel to edit read-only pages.
+ * Side-effect: any interrupt handler running between save and restore will have
+ * the ability to write to read-only pages.
+ *
+ * Warning:
+ * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and
+ * no thread can be preempted in the instructions being modified (no iret to an
+ * invalid instruction possible) or if the instructions are changed from a
+ * consistent state to another consistent state atomically.
+ * On the local CPU you need to be protected again NMI or MCE handlers seeing an
+ * inconsistent instruction while you patch.
+ */
+extern void *text_poke(void *addr, const void *opcode, size_t len);
+extern int poke_int3_handler(struct pt_regs *regs);
+extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+
+#endif /* _ASM_X86_TEXT_PATCHING_H */
index ffae84df8a9313cd3cc7b12953c0c24a809cd792..30c133ac05cd86d6ddba68c8993160296cafde04 100644 (file)
@@ -255,7 +255,7 @@ static inline bool test_and_clear_restore_sigmask(void)
        return true;
 }
 
-static inline bool is_ia32_task(void)
+static inline bool in_ia32_syscall(void)
 {
 #ifdef CONFIG_X86_32
        return true;
index 1fde8d580a5bac371c7a56cf60e688c49d2299f6..4e5be94e079a6c64353bd327c9fe4ef9796e16b1 100644 (file)
@@ -181,7 +181,7 @@ static inline void __native_flush_tlb_single(unsigned long addr)
 
 static inline void __flush_tlb_all(void)
 {
-       if (cpu_has_pge)
+       if (static_cpu_has(X86_FEATURE_PGE))
                __flush_tlb_global();
        else
                __flush_tlb();
index 174c4212780afde12200fd23a1dbc17a08f40309..7428697c5b8df1d6856ca63fdda62cb047083f28 100644 (file)
@@ -22,7 +22,7 @@ extern void disable_TSC(void);
 static inline cycles_t get_cycles(void)
 {
 #ifndef CONFIG_X86_TSC
-       if (!cpu_has_tsc)
+       if (!boot_cpu_has(X86_FEATURE_TSC))
                return 0;
 #endif
 
index 2e7513d1f1f45eb3bb7ec4ebe459aec2fa76b46d..12f9653bde8d968c3374e6f13a2fbc2abacccda0 100644 (file)
@@ -118,7 +118,7 @@ struct exception_table_entry {
 
 extern int fixup_exception(struct pt_regs *regs, int trapnr);
 extern bool ex_has_fault_handler(unsigned long ip);
-extern int early_fixup_exception(unsigned long *ip);
+extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
 
 /*
  * These are the main single-value transfer routines.  They automatically
index c54beb44c4c1f20e4dda33fb901d17b2850fecf5..635eac54392293c6e3250a91aedfe30dee37a976 100644 (file)
@@ -550,7 +550,7 @@ static struct xor_block_template xor_block_pIII_sse = {
 #define XOR_TRY_TEMPLATES                              \
 do {                                                   \
        AVX_XOR_SPEED;                                  \
-       if (cpu_has_xmm) {                              \
+       if (boot_cpu_has(X86_FEATURE_XMM)) {                            \
                xor_speed(&xor_block_pIII_sse);         \
                xor_speed(&xor_block_sse_pf64);         \
        } else if (boot_cpu_has(X86_FEATURE_MMX)) {     \
index 7c0a517ec7511a667166c216df8357087ff3e7b0..22a7b1870a31801be3a28513f6f595fb483b36e1 100644 (file)
@@ -167,12 +167,12 @@ static struct xor_block_template xor_block_avx = {
 
 #define AVX_XOR_SPEED \
 do { \
-       if (cpu_has_avx && cpu_has_osxsave) \
+       if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \
                xor_speed(&xor_block_avx); \
 } while (0)
 
 #define AVX_SELECT(FASTEST) \
-       (cpu_has_avx && cpu_has_osxsave ? &xor_block_avx : FASTEST)
+       (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)
 
 #else
 
index 8c2f1ef6ca236ea16b9ae25a390b32d33ef566d4..2522e564269e240604dc4af7cf5a20fc8c86e175 100644 (file)
@@ -136,7 +136,7 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
 {
        struct acpi_table_madt *madt = NULL;
 
-       if (!cpu_has_apic)
+       if (!boot_cpu_has(X86_FEATURE_APIC))
                return -EINVAL;
 
        madt = (struct acpi_table_madt *)table;
@@ -951,7 +951,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
 {
        int count;
 
-       if (!cpu_has_apic)
+       if (!boot_cpu_has(X86_FEATURE_APIC))
                return -ENODEV;
 
        /*
@@ -979,7 +979,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
        int ret;
        struct acpi_subtable_proc madt_proc[2];
 
-       if (!cpu_has_apic)
+       if (!boot_cpu_has(X86_FEATURE_APIC))
                return -ENODEV;
 
        /*
@@ -1125,7 +1125,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
        if (acpi_disabled || acpi_noirq)
                return -ENODEV;
 
-       if (!cpu_has_apic)
+       if (!boot_cpu_has(X86_FEATURE_APIC))
                return -ENODEV;
 
        /*
index 25f909362b7a89c42f32c8ebe74fb91239df3ca8..5cb272a7a5a32eccbbee61a23c50ab9b5a80a0fb 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/stop_machine.h>
 #include <linux/slab.h>
 #include <linux/kdebug.h>
+#include <asm/text-patching.h>
 #include <asm/alternative.h>
 #include <asm/sections.h>
 #include <asm/pgtable.h>
index d356987a04e97ba6537b2895d45114851e7b705b..60078a67d7e36064e667abfd679ed569db3a8173 100644 (file)
@@ -607,7 +607,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
        long tapic = apic_read(APIC_TMCCT);
        unsigned long pm = acpi_pm_read_early();
 
-       if (cpu_has_tsc)
+       if (boot_cpu_has(X86_FEATURE_TSC))
                tsc = rdtsc();
 
        switch (lapic_cal_loops++) {
@@ -668,7 +668,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
        *delta = (long)res;
 
        /* Correct the tsc counter value */
-       if (cpu_has_tsc) {
+       if (boot_cpu_has(X86_FEATURE_TSC)) {
                res = (((u64)(*deltatsc)) * pm_100ms);
                do_div(res, deltapm);
                apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
@@ -760,7 +760,7 @@ static int __init calibrate_APIC_clock(void)
        apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
                    lapic_timer_frequency);
 
-       if (cpu_has_tsc) {
+       if (boot_cpu_has(X86_FEATURE_TSC)) {
                apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
                            "%ld.%04ld MHz.\n",
                            (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
@@ -1085,7 +1085,7 @@ void lapic_shutdown(void)
 {
        unsigned long flags;
 
-       if (!cpu_has_apic && !apic_from_smp_config())
+       if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
                return;
 
        local_irq_save(flags);
@@ -1134,7 +1134,7 @@ void __init init_bsp_APIC(void)
         * Don't do the setup now if we have a SMP BIOS as the
         * through-I/O-APIC virtual wire mode might be active.
         */
-       if (smp_found_config || !cpu_has_apic)
+       if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
                return;
 
        /*
@@ -1227,7 +1227,7 @@ void setup_local_APIC(void)
        unsigned long long tsc = 0, ntsc;
        long long max_loops = cpu_khz ? cpu_khz : 1000000;
 
-       if (cpu_has_tsc)
+       if (boot_cpu_has(X86_FEATURE_TSC))
                tsc = rdtsc();
 
        if (disable_apic) {
@@ -1311,7 +1311,7 @@ void setup_local_APIC(void)
                        break;
                }
                if (queued) {
-                       if (cpu_has_tsc && cpu_khz) {
+                       if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
                                ntsc = rdtsc();
                                max_loops = (cpu_khz << 10) - (ntsc - tsc);
                        } else
@@ -1445,7 +1445,7 @@ static void __x2apic_disable(void)
 {
        u64 msr;
 
-       if (!cpu_has_apic)
+       if (!boot_cpu_has(X86_FEATURE_APIC))
                return;
 
        rdmsrl(MSR_IA32_APICBASE, msr);
@@ -1561,7 +1561,7 @@ void __init check_x2apic(void)
                pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
                x2apic_mode = 1;
                x2apic_state = X2APIC_ON;
-       } else if (!cpu_has_x2apic) {
+       } else if (!boot_cpu_has(X86_FEATURE_X2APIC)) {
                x2apic_state = X2APIC_DISABLED;
        }
 }
@@ -1632,7 +1632,7 @@ void __init enable_IR_x2apic(void)
  */
 static int __init detect_init_APIC(void)
 {
-       if (!cpu_has_apic) {
+       if (!boot_cpu_has(X86_FEATURE_APIC)) {
                pr_info("No local APIC present\n");
                return -1;
        }
@@ -1711,14 +1711,14 @@ static int __init detect_init_APIC(void)
                goto no_apic;
        case X86_VENDOR_INTEL:
                if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
-                   (boot_cpu_data.x86 == 5 && cpu_has_apic))
+                   (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)))
                        break;
                goto no_apic;
        default:
                goto no_apic;
        }
 
-       if (!cpu_has_apic) {
+       if (!boot_cpu_has(X86_FEATURE_APIC)) {
                /*
                 * Over-ride BIOS and try to enable the local APIC only if
                 * "lapic" specified.
@@ -2233,19 +2233,19 @@ int __init APIC_init_uniprocessor(void)
                return -1;
        }
 #ifdef CONFIG_X86_64
-       if (!cpu_has_apic) {
+       if (!boot_cpu_has(X86_FEATURE_APIC)) {
                disable_apic = 1;
                pr_info("Apic disabled by BIOS\n");
                return -1;
        }
 #else
-       if (!smp_found_config && !cpu_has_apic)
+       if (!smp_found_config && !boot_cpu_has(X86_FEATURE_APIC))
                return -1;
 
        /*
         * Complain if the BIOS pretends there is one.
         */
-       if (!cpu_has_apic &&
+       if (!boot_cpu_has(X86_FEATURE_APIC) &&
            APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
                pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
                        boot_cpu_physical_apicid);
@@ -2426,7 +2426,7 @@ static void apic_pm_activate(void)
 static int __init init_lapic_sysfs(void)
 {
        /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
-       if (cpu_has_apic)
+       if (boot_cpu_has(X86_FEATURE_APIC))
                register_syscore_ops(&lapic_syscore_ops);
 
        return 0;
index 331a7a07c48fefe0313f3089c1bb497dc48cd7e4..13d19ed585142eae225625bd808c1c1acba0b790 100644 (file)
@@ -100,13 +100,13 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask,
 
 static u32 noop_apic_read(u32 reg)
 {
-       WARN_ON_ONCE((cpu_has_apic && !disable_apic));
+       WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
        return 0;
 }
 
 static void noop_apic_write(u32 reg, u32 v)
 {
-       WARN_ON_ONCE(cpu_has_apic && !disable_apic);
+       WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
 }
 
 struct apic apic_noop = {
index fdb0fbfb1197a4cf4e485c4f330b3a399b3a6d5f..84e33ff5a6d595693c3718477f168f20d5546316 100644 (file)
@@ -1454,7 +1454,7 @@ void native_disable_io_apic(void)
                ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
        }
 
-       if (cpu_has_apic || apic_from_smp_config())
+       if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config())
                disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 }
 
index 28bde88b0085d2284947016aae114574958428ed..2a0f225afebd5925d66decbba2060b2781837b39 100644 (file)
@@ -230,7 +230,7 @@ int safe_smp_processor_id(void)
 {
        int apicid, cpuid;
 
-       if (!cpu_has_apic)
+       if (!boot_cpu_has(X86_FEATURE_APIC))
                return 0;
 
        apicid = hard_smp_processor_id();
index ef495511f019f0a899325d6a28ed5da98e1c939e..a5e400afc5632d225f4a197ec9121c8019f5a9a8 100644 (file)
@@ -944,7 +944,7 @@ static int __init print_ICs(void)
        print_PIC();
 
        /* don't print out if apic is not there */
-       if (!cpu_has_apic && !apic_from_smp_config())
+       if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
                return 0;
 
        print_local_APICs(show_lapic);
index 7b76eb67a9b3dcb84bb8e6cd6e40945924d32938..c343a54bed396d2f924d4ed6d8e5ec2188c636b0 100644 (file)
@@ -565,14 +565,17 @@ static void early_init_amd(struct cpuinfo_x86 *c)
         * can safely set X86_FEATURE_EXTD_APICID unconditionally for families
         * after 16h.
         */
-       if (cpu_has_apic && c->x86 > 0x16) {
-               set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
-       } else if (cpu_has_apic && c->x86 >= 0xf) {
-               /* check CPU config space for extended APIC ID */
-               unsigned int val;
-               val = read_pci_config(0, 24, 0, 0x68);
-               if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
+       if (boot_cpu_has(X86_FEATURE_APIC)) {
+               if (c->x86 > 0x16)
                        set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+               else if (c->x86 >= 0xf) {
+                       /* check CPU config space for extended APIC ID */
+                       unsigned int val;
+
+                       val = read_pci_config(0, 24, 0, 0x68);
+                       if ((val >> 17 & 0x3) == 0x3)
+                               set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+               }
        }
 #endif
 
@@ -628,6 +631,7 @@ static void init_amd_k8(struct cpuinfo_x86 *c)
         */
        msr_set_bit(MSR_K7_HWCR, 6);
 #endif
+       set_cpu_bug(c, X86_BUG_SWAPGS_FENCE);
 }
 
 static void init_amd_gh(struct cpuinfo_x86 *c)
@@ -746,7 +750,7 @@ static void init_amd(struct cpuinfo_x86 *c)
        if (c->x86 >= 0xf)
                set_cpu_cap(c, X86_FEATURE_K8);
 
-       if (cpu_has_xmm2) {
+       if (cpu_has(c, X86_FEATURE_XMM2)) {
                /* MFENCE stops RDTSC speculation */
                set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
        }
index dbc6f066e2313ecb6276fe4462b8f349bb3100a6..6ef6ed9ccca6954891e2ea419990a89706d64523 100644 (file)
@@ -430,7 +430,7 @@ void load_percpu_segment(int cpu)
 #ifdef CONFIG_X86_32
        loadsegment(fs, __KERNEL_PERCPU);
 #else
-       loadsegment(gs, 0);
+       __loadsegment_simple(gs, 0);
        wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
 #endif
        load_stack_canary_segment();
@@ -866,30 +866,34 @@ static void detect_nopl(struct cpuinfo_x86 *c)
 #else
        set_cpu_cap(c, X86_FEATURE_NOPL);
 #endif
+}
 
+static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_64
        /*
-        * ESPFIX is a strange bug.  All real CPUs have it.  Paravirt
-        * systems that run Linux at CPL > 0 may or may not have the
-        * issue, but, even if they have the issue, there's absolutely
-        * nothing we can do about it because we can't use the real IRET
-        * instruction.
+        * Empirically, writing zero to a segment selector on AMD does
+        * not clear the base, whereas writing zero to a segment
+        * selector on Intel does clear the base.  Intel's behavior
+        * allows slightly faster context switches in the common case
+        * where GS is unused by the prev and next threads.
         *
-        * NB: For the time being, only 32-bit kernels support
-        * X86_BUG_ESPFIX as such.  64-bit kernels directly choose
-        * whether to apply espfix using paravirt hooks.  If any
-        * non-paravirt system ever shows up that does *not* have the
-        * ESPFIX issue, we can change this.
+        * Since neither vendor documents this anywhere that I can see,
+        * detect it directly instead of hardcoding the choice by
+        * vendor.
+        *
+        * I've designated AMD's behavior as the "bug" because it's
+        * counterintuitive and less friendly.
         */
-#ifdef CONFIG_X86_32
-#ifdef CONFIG_PARAVIRT
-       do {
-               extern void native_iret(void);
-               if (pv_cpu_ops.iret == native_iret)
-                       set_cpu_bug(c, X86_BUG_ESPFIX);
-       } while (0);
-#else
-       set_cpu_bug(c, X86_BUG_ESPFIX);
-#endif
+
+       unsigned long old_base, tmp;
+       rdmsrl(MSR_FS_BASE, old_base);
+       wrmsrl(MSR_FS_BASE, 1);
+       loadsegment(fs, 0);
+       rdmsrl(MSR_FS_BASE, tmp);
+       if (tmp != 0)
+               set_cpu_bug(c, X86_BUG_NULL_SEG);
+       wrmsrl(MSR_FS_BASE, old_base);
 #endif
 }
 
@@ -925,6 +929,33 @@ static void generic_identify(struct cpuinfo_x86 *c)
        get_model_name(c); /* Default name */
 
        detect_nopl(c);
+
+       detect_null_seg_behavior(c);
+
+       /*
+        * ESPFIX is a strange bug.  All real CPUs have it.  Paravirt
+        * systems that run Linux at CPL > 0 may or may not have the
+        * issue, but, even if they have the issue, there's absolutely
+        * nothing we can do about it because we can't use the real IRET
+        * instruction.
+        *
+        * NB: For the time being, only 32-bit kernels support
+        * X86_BUG_ESPFIX as such.  64-bit kernels directly choose
+        * whether to apply espfix using paravirt hooks.  If any
+        * non-paravirt system ever shows up that does *not* have the
+        * ESPFIX issue, we can change this.
+        */
+#ifdef CONFIG_X86_32
+# ifdef CONFIG_PARAVIRT
+       do {
+               extern void native_iret(void);
+               if (pv_cpu_ops.iret == native_iret)
+                       set_cpu_bug(c, X86_BUG_ESPFIX);
+       } while (0);
+# else
+       set_cpu_bug(c, X86_BUG_ESPFIX);
+# endif
+#endif
 }
 
 static void x86_init_cache_qos(struct cpuinfo_x86 *c)
@@ -1080,12 +1111,12 @@ void enable_sep_cpu(void)
        struct tss_struct *tss;
        int cpu;
 
+       if (!boot_cpu_has(X86_FEATURE_SEP))
+               return;
+
        cpu = get_cpu();
        tss = &per_cpu(cpu_tss, cpu);
 
-       if (!boot_cpu_has(X86_FEATURE_SEP))
-               goto out;
-
        /*
         * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
         * see the big comment in struct x86_hw_tss's definition.
@@ -1100,7 +1131,6 @@ void enable_sep_cpu(void)
 
        wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
 
-out:
        put_cpu();
 }
 #endif
@@ -1532,7 +1562,7 @@ void cpu_init(void)
        pr_info("Initializing CPU#%d\n", cpu);
 
        if (cpu_feature_enabled(X86_FEATURE_VME) ||
-           cpu_has_tsc ||
+           boot_cpu_has(X86_FEATURE_TSC) ||
            boot_cpu_has(X86_FEATURE_DE))
                cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
 
index 6adef9cac23ee99c96924e2789abeb2d1ad123f3..bd9dcd6b712d0c09937facb3cdd41da866edae81 100644 (file)
@@ -333,7 +333,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
                switch (dir0_lsn) {
                case 0xd:  /* either a 486SLC or DLC w/o DEVID */
                        dir0_msn = 0;
-                       p = Cx486_name[(cpu_has_fpu ? 1 : 0)];
+                       p = Cx486_name[!!boot_cpu_has(X86_FEATURE_FPU)];
                        break;
 
                case 0xe:  /* a 486S A step */
index e4393bfc7f0d9bb39520adceeae2968652779a5c..b18f4706e607ab9f6f570223187cec6205ee78de 100644 (file)
@@ -152,9 +152,9 @@ static void early_init_intel(struct cpuinfo_x86 *c)
         *  the TLB when any changes are made to any of the page table entries.
         *  The operating system must reload CR3 to cause the TLB to be flushed"
         *
-        * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should
-        * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
-        * to be modified
+        * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h
+        * should be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
+        * to be modified.
         */
        if (c->x86 == 5 && c->x86_model == 9) {
                pr_info("Disabling PGE capability bit\n");
@@ -281,7 +281,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
         * integrated APIC (see 11AP erratum in "Pentium Processor
         * Specification Update").
         */
-       if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
+       if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
            (c->x86_mask < 0x6 || c->x86_mask == 0xb))
                set_cpu_bug(c, X86_BUG_11AP);
 
@@ -456,7 +456,7 @@ static void init_intel(struct cpuinfo_x86 *c)
                        set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
        }
 
-       if (cpu_has_xmm2)
+       if (cpu_has(c, X86_FEATURE_XMM2))
                set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
 
        if (boot_cpu_has(X86_FEATURE_DS)) {
@@ -468,7 +468,7 @@ static void init_intel(struct cpuinfo_x86 *c)
                        set_cpu_cap(c, X86_FEATURE_PEBS);
        }
 
-       if (c->x86 == 6 && cpu_has_clflush &&
+       if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) &&
            (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
                set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
 
index 1e8bb6c94f14c0e639339a518f2c8949cf7143ca..1defb8ea882c09033461728596f5e65aa73fba51 100644 (file)
@@ -84,7 +84,7 @@ static int cmci_supported(int *banks)
         */
        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
                return 0;
-       if (!cpu_has_apic || lapic_get_maxlvt() < 6)
+       if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
                return 0;
        rdmsrl(MSR_IA32_MCG_CAP, cap);
        *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
index ac780cad3b8601db3bcc2e174f69dc9ca3f00473..6b9dc4d18cccd1cd8fa6cf070b618bc878ad5a05 100644 (file)
@@ -450,7 +450,7 @@ asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
 /* Thermal monitoring depends on APIC, ACPI and clock modulation */
 static int intel_thermal_supported(struct cpuinfo_x86 *c)
 {
-       if (!cpu_has_apic)
+       if (!boot_cpu_has(X86_FEATURE_APIC))
                return 0;
        if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
                return 0;
index f8c81ba0b4651c02cd31f9a1117e64192e64e1d0..b1086f79e57e44858105feb91d7f015caeaec18f 100644 (file)
@@ -137,7 +137,7 @@ static void prepare_set(void)
        u32 cr0;
 
        /*  Save value of CR4 and clear Page Global Enable (bit 7)  */
-       if (cpu_has_pge) {
+       if (boot_cpu_has(X86_FEATURE_PGE)) {
                cr4 = __read_cr4();
                __write_cr4(cr4 & ~X86_CR4_PGE);
        }
@@ -170,7 +170,7 @@ static void post_set(void)
        write_cr0(read_cr0() & ~X86_CR0_CD);
 
        /* Restore value of CR4 */
-       if (cpu_has_pge)
+       if (boot_cpu_has(X86_FEATURE_PGE))
                __write_cr4(cr4);
 }
 
index 19f57360dfd2583b82743c8cb69ffaef7175139b..16e37a2581acd51dc00249c17918765bd2c97bd6 100644 (file)
@@ -444,11 +444,24 @@ static void __init print_mtrr_state(void)
                pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
 }
 
+/* PAT setup for BP. We need to go through sync steps here */
+void __init mtrr_bp_pat_init(void)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       prepare_set();
+
+       pat_init();
+
+       post_set();
+       local_irq_restore(flags);
+}
+
 /* Grab all of the MTRR state for this CPU into *state */
 bool __init get_mtrr_state(void)
 {
        struct mtrr_var_range *vrs;
-       unsigned long flags;
        unsigned lo, dummy;
        unsigned int i;
 
@@ -481,15 +494,6 @@ bool __init get_mtrr_state(void)
 
        mtrr_state_set = 1;
 
-       /* PAT setup for BP. We need to go through sync steps here */
-       local_irq_save(flags);
-       prepare_set();
-
-       pat_init();
-
-       post_set();
-       local_irq_restore(flags);
-
        return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
 }
 
@@ -741,7 +745,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
        wbinvd();
 
        /* Save value of CR4 and clear Page Global Enable (bit 7) */
-       if (cpu_has_pge) {
+       if (boot_cpu_has(X86_FEATURE_PGE)) {
                cr4 = __read_cr4();
                __write_cr4(cr4 & ~X86_CR4_PGE);
        }
@@ -771,7 +775,7 @@ static void post_set(void) __releases(set_atomicity_lock)
        write_cr0(read_cr0() & ~X86_CR0_CD);
 
        /* Restore value of CR4 */
-       if (cpu_has_pge)
+       if (boot_cpu_has(X86_FEATURE_PGE))
                __write_cr4(cr4);
        raw_spin_unlock(&set_atomicity_lock);
 }
index 10f8d4796240709cea61c0e56522d75eeff8ffdd..7d393ecdeee692187b726c003532e1124faa02d4 100644 (file)
@@ -752,6 +752,9 @@ void __init mtrr_bp_init(void)
                        /* BIOS may override */
                        __mtrr_enabled = get_mtrr_state();
 
+                       if (mtrr_enabled())
+                               mtrr_bp_pat_init();
+
                        if (mtrr_cleanup(phys_addr)) {
                                changed_by_mtrr_cleanup = 1;
                                mtrr_if->set_all();
@@ -759,8 +762,16 @@ void __init mtrr_bp_init(void)
                }
        }
 
-       if (!mtrr_enabled())
+       if (!mtrr_enabled()) {
                pr_info("MTRR: Disabled\n");
+
+               /*
+                * PAT initialization relies on MTRR's rendezvous handler.
+                * Skip PAT init until the handler can initialize both
+                * features independently.
+                */
+               pat_disable("MTRRs disabled, skipping PAT initialization too.");
+       }
 }
 
 void mtrr_ap_init(void)
index 951884dcc43354573c2bd234aed3fd3adb067a84..6c7ced07d16d1181c6ef21f4f2252ef63019a77b 100644 (file)
@@ -52,6 +52,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
 void fill_mtrr_var_range(unsigned int index,
                u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
 bool get_mtrr_state(void);
+void mtrr_bp_pat_init(void);
 
 extern void set_mtrr_ops(const struct mtrr_ops *ops);
 
index 364e5834689753fc7da34c6dc8a34cca22ab92db..8cac429b6a1d53255899e84459ab9436d3df4100 100644 (file)
@@ -94,7 +94,7 @@ static void __init vmware_platform_setup(void)
  */
 static uint32_t __init vmware_platform(void)
 {
-       if (cpu_has_hypervisor) {
+       if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
                unsigned int eax;
                unsigned int hyper_vendor_id[3];
 
index 1f4acd68b98bccb7bf4032bc6ff2bde3f4efecdb..3fe45f84ced4463b147bcb907feedca94c07cab0 100644 (file)
@@ -151,7 +151,7 @@ static void __init dtb_lapic_setup(void)
                return;
 
        /* Did the boot loader setup the local APIC ? */
-       if (!cpu_has_apic) {
+       if (!boot_cpu_has(X86_FEATURE_APIC)) {
                if (apic_force_enable(r.start))
                        return;
        }
index dd9ca9b60ff3a497b5033c3fd714f93140ac689f..aad34aafc0e08ae6ed20d018874eb6b07073922a 100644 (file)
@@ -21,11 +21,15 @@ static double __initdata y = 3145727.0;
  * We should really only care about bugs here
  * anyway. Not features.
  */
-static void __init check_fpu(void)
+void __init fpu__init_check_bugs(void)
 {
        u32 cr0_saved;
        s32 fdiv_bug;
 
+       /* kernel_fpu_begin/end() relies on patched alternative instructions. */
+       if (!boot_cpu_has(X86_FEATURE_FPU))
+               return;
+
        /* We might have CR0::TS set already, clear it: */
        cr0_saved = read_cr0();
        write_cr0(cr0_saved & ~X86_CR0_TS);
@@ -59,13 +63,3 @@ static void __init check_fpu(void)
                pr_warn("Hmm, FPU with FDIV bug\n");
        }
 }
-
-void __init fpu__init_check_bugs(void)
-{
-       /*
-        * kernel_fpu_begin/end() in check_fpu() relies on the patched
-        * alternative instructions.
-        */
-       if (cpu_has_fpu)
-               check_fpu();
-}
index 8e37cc8a539adc1c9d348d9a25b13815d4166aa6..97027545a72dcd4c34964aff481ac1b7a94c0df7 100644 (file)
@@ -217,14 +217,14 @@ static inline void fpstate_init_fstate(struct fregs_state *fp)
 
 void fpstate_init(union fpregs_state *state)
 {
-       if (!cpu_has_fpu) {
+       if (!static_cpu_has(X86_FEATURE_FPU)) {
                fpstate_init_soft(&state->soft);
                return;
        }
 
        memset(state, 0, xstate_size);
 
-       if (cpu_has_fxsr)
+       if (static_cpu_has(X86_FEATURE_FXSR))
                fpstate_init_fxstate(&state->fxsave);
        else
                fpstate_init_fstate(&state->fsave);
@@ -237,7 +237,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
        dst_fpu->fpregs_active = 0;
        dst_fpu->last_cpu = -1;
 
-       if (!src_fpu->fpstate_active || !cpu_has_fpu)
+       if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
                return 0;
 
        WARN_ON_FPU(src_fpu != &current->thread.fpu);
@@ -506,33 +506,6 @@ void fpu__clear(struct fpu *fpu)
  * x87 math exception handling:
  */
 
-static inline unsigned short get_fpu_cwd(struct fpu *fpu)
-{
-       if (cpu_has_fxsr) {
-               return fpu->state.fxsave.cwd;
-       } else {
-               return (unsigned short)fpu->state.fsave.cwd;
-       }
-}
-
-static inline unsigned short get_fpu_swd(struct fpu *fpu)
-{
-       if (cpu_has_fxsr) {
-               return fpu->state.fxsave.swd;
-       } else {
-               return (unsigned short)fpu->state.fsave.swd;
-       }
-}
-
-static inline unsigned short get_fpu_mxcsr(struct fpu *fpu)
-{
-       if (cpu_has_xmm) {
-               return fpu->state.fxsave.mxcsr;
-       } else {
-               return MXCSR_DEFAULT;
-       }
-}
-
 int fpu__exception_code(struct fpu *fpu, int trap_nr)
 {
        int err;
@@ -547,10 +520,15 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
                 * so if this combination doesn't produce any single exception,
                 * then we have a bad program that isn't synchronizing its FPU usage
                 * and it will suffer the consequences since we won't be able to
-                * fully reproduce the context of the exception
+                * fully reproduce the context of the exception.
                 */
-               cwd = get_fpu_cwd(fpu);
-               swd = get_fpu_swd(fpu);
+               if (boot_cpu_has(X86_FEATURE_FXSR)) {
+                       cwd = fpu->state.fxsave.cwd;
+                       swd = fpu->state.fxsave.swd;
+               } else {
+                       cwd = (unsigned short)fpu->state.fsave.cwd;
+                       swd = (unsigned short)fpu->state.fsave.swd;
+               }
 
                err = swd & ~cwd;
        } else {
@@ -560,7 +538,11 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
                 * unmasked exception was caught we must mask the exception mask bits
                 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
                 */
-               unsigned short mxcsr = get_fpu_mxcsr(fpu);
+               unsigned short mxcsr = MXCSR_DEFAULT;
+
+               if (boot_cpu_has(X86_FEATURE_XMM))
+                       mxcsr = fpu->state.fxsave.mxcsr;
+
                err = ~(mxcsr >> 7) & mxcsr;
        }
 
index 54c86fffbf9f85281a77977a4aa8dbb67f112464..aacfd7a82cec57b9f2eb2f57e17d277a9cd74141 100644 (file)
@@ -29,22 +29,22 @@ static void fpu__init_cpu_generic(void)
        unsigned long cr0;
        unsigned long cr4_mask = 0;
 
-       if (cpu_has_fxsr)
+       if (boot_cpu_has(X86_FEATURE_FXSR))
                cr4_mask |= X86_CR4_OSFXSR;
-       if (cpu_has_xmm)
+       if (boot_cpu_has(X86_FEATURE_XMM))
                cr4_mask |= X86_CR4_OSXMMEXCPT;
        if (cr4_mask)
                cr4_set_bits(cr4_mask);
 
        cr0 = read_cr0();
        cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
-       if (!cpu_has_fpu)
+       if (!boot_cpu_has(X86_FEATURE_FPU))
                cr0 |= X86_CR0_EM;
        write_cr0(cr0);
 
        /* Flush out any pending x87 state: */
 #ifdef CONFIG_MATH_EMULATION
-       if (!cpu_has_fpu)
+       if (!boot_cpu_has(X86_FEATURE_FPU))
                fpstate_init_soft(&current->thread.fpu.state.soft);
        else
 #endif
@@ -89,7 +89,7 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
        }
 
 #ifndef CONFIG_MATH_EMULATION
-       if (!cpu_has_fpu) {
+       if (!boot_cpu_has(X86_FEATURE_FPU)) {
                pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n");
                for (;;)
                        asm volatile("hlt");
@@ -106,7 +106,7 @@ static void __init fpu__init_system_mxcsr(void)
 {
        unsigned int mask = 0;
 
-       if (cpu_has_fxsr) {
+       if (boot_cpu_has(X86_FEATURE_FXSR)) {
                /* Static because GCC does not get 16-byte stack alignment right: */
                static struct fxregs_state fxregs __initdata;
 
@@ -212,7 +212,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
         * fpu__init_system_xstate().
         */
 
-       if (!cpu_has_fpu) {
+       if (!boot_cpu_has(X86_FEATURE_FPU)) {
                /*
                 * Disable xsave as we do not support it if i387
                 * emulation is enabled.
@@ -221,7 +221,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
                setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
                xstate_size = sizeof(struct swregs_state);
        } else {
-               if (cpu_has_fxsr)
+               if (boot_cpu_has(X86_FEATURE_FXSR))
                        xstate_size = sizeof(struct fxregs_state);
                else
                        xstate_size = sizeof(struct fregs_state);
index 8bd1c003942aa801ed026ec770bdf00bc4cae946..81422dfb152b7c8e012300637b1acfd40384f697 100644 (file)
@@ -21,7 +21,10 @@ int regset_xregset_fpregs_active(struct task_struct *target, const struct user_r
 {
        struct fpu *target_fpu = &target->thread.fpu;
 
-       return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0;
+       if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active)
+               return regset->n;
+       else
+               return 0;
 }
 
 int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
@@ -30,7 +33,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
 {
        struct fpu *fpu = &target->thread.fpu;
 
-       if (!cpu_has_fxsr)
+       if (!boot_cpu_has(X86_FEATURE_FXSR))
                return -ENODEV;
 
        fpu__activate_fpstate_read(fpu);
@@ -47,7 +50,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
        struct fpu *fpu = &target->thread.fpu;
        int ret;
 
-       if (!cpu_has_fxsr)
+       if (!boot_cpu_has(X86_FEATURE_FXSR))
                return -ENODEV;
 
        fpu__activate_fpstate_write(fpu);
@@ -65,7 +68,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
         * update the header bits in the xsave header, indicating the
         * presence of FP and SSE state.
         */
-       if (cpu_has_xsave)
+       if (boot_cpu_has(X86_FEATURE_XSAVE))
                fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
 
        return ret;
@@ -79,7 +82,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
        struct xregs_state *xsave;
        int ret;
 
-       if (!cpu_has_xsave)
+       if (!boot_cpu_has(X86_FEATURE_XSAVE))
                return -ENODEV;
 
        fpu__activate_fpstate_read(fpu);
@@ -108,7 +111,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
        struct xregs_state *xsave;
        int ret;
 
-       if (!cpu_has_xsave)
+       if (!boot_cpu_has(X86_FEATURE_XSAVE))
                return -ENODEV;
 
        fpu__activate_fpstate_write(fpu);
@@ -275,10 +278,10 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 
        fpu__activate_fpstate_read(fpu);
 
-       if (!static_cpu_has(X86_FEATURE_FPU))
+       if (!boot_cpu_has(X86_FEATURE_FPU))
                return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
 
-       if (!cpu_has_fxsr)
+       if (!boot_cpu_has(X86_FEATURE_FXSR))
                return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
                                           &fpu->state.fsave, 0,
                                           -1);
@@ -306,10 +309,10 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
        fpu__activate_fpstate_write(fpu);
        fpstate_sanitize_xstate(fpu);
 
-       if (!static_cpu_has(X86_FEATURE_FPU))
+       if (!boot_cpu_has(X86_FEATURE_FPU))
                return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
 
-       if (!cpu_has_fxsr)
+       if (!boot_cpu_has(X86_FEATURE_FXSR))
                return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
                                          &fpu->state.fsave, 0,
                                          -1);
@@ -325,7 +328,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
         * update the header bit in the xsave header, indicating the
         * presence of FP.
         */
-       if (cpu_has_xsave)
+       if (boot_cpu_has(X86_FEATURE_XSAVE))
                fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP;
        return ret;
 }
index b48ef35b28d4fbcab4b5f00d613b57e63a00b1d0..4ea2a59483c7b1b07c60178daa55b084337dcf07 100644 (file)
@@ -190,7 +190,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
  */
 void fpu__init_cpu_xstate(void)
 {
-       if (!cpu_has_xsave || !xfeatures_mask)
+       if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask)
                return;
 
        cr4_set_bits(X86_CR4_OSXSAVE);
@@ -280,7 +280,7 @@ static void __init setup_xstate_comp(void)
        xstate_comp_offsets[0] = 0;
        xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
 
-       if (!cpu_has_xsaves) {
+       if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
                for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
                        if (xfeature_enabled(i)) {
                                xstate_comp_offsets[i] = xstate_offsets[i];
@@ -316,13 +316,13 @@ static void __init setup_init_fpu_buf(void)
        WARN_ON_FPU(!on_boot_cpu);
        on_boot_cpu = 0;
 
-       if (!cpu_has_xsave)
+       if (!boot_cpu_has(X86_FEATURE_XSAVE))
                return;
 
        setup_xstate_features();
        print_xstate_features();
 
-       if (cpu_has_xsaves) {
+       if (boot_cpu_has(X86_FEATURE_XSAVES)) {
                init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
                init_fpstate.xsave.header.xfeatures = xfeatures_mask;
        }
@@ -417,7 +417,7 @@ static int xfeature_size(int xfeature_nr)
  */
 static int using_compacted_format(void)
 {
-       return cpu_has_xsaves;
+       return boot_cpu_has(X86_FEATURE_XSAVES);
 }
 
 static void __xstate_dump_leaves(void)
@@ -549,7 +549,7 @@ static unsigned int __init calculate_xstate_size(void)
        unsigned int eax, ebx, ecx, edx;
        unsigned int calculated_xstate_size;
 
-       if (!cpu_has_xsaves) {
+       if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
                /*
                 * - CPUID function 0DH, sub-function 0:
                 *    EBX enumerates the size (in bytes) required by
@@ -630,7 +630,7 @@ void __init fpu__init_system_xstate(void)
        WARN_ON_FPU(!on_boot_cpu);
        on_boot_cpu = 0;
 
-       if (!cpu_has_xsave) {
+       if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
                pr_info("x86/fpu: Legacy x87 FPU detected.\n");
                return;
        }
@@ -667,7 +667,7 @@ void __init fpu__init_system_xstate(void)
        pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
                xfeatures_mask,
                xstate_size,
-               cpu_has_xsaves ? "compacted" : "standard");
+               boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
 }
 
 /*
@@ -678,7 +678,7 @@ void fpu__resume_cpu(void)
        /*
         * Restore XCR0 on xsave capable CPUs:
         */
-       if (cpu_has_xsave)
+       if (boot_cpu_has(X86_FEATURE_XSAVE))
                xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
 }
 
index af1112980dd411334ef59d7d0a7b818946f2137d..6f8902b0d1514bd9f5b2b3ea86f55f96fa6d8618 100644 (file)
@@ -555,62 +555,53 @@ early_idt_handler_common:
         */
        cld
 
-       cmpl $2,(%esp)          # X86_TRAP_NMI
-       je .Lis_nmi             # Ignore NMI
-
-       cmpl $2,%ss:early_recursion_flag
-       je hlt_loop
        incl %ss:early_recursion_flag
 
-       push %eax               # 16(%esp)
-       push %ecx               # 12(%esp)
-       push %edx               #  8(%esp)
-       push %ds                #  4(%esp)
-       push %es                #  0(%esp)
-       movl $(__KERNEL_DS),%eax
-       movl %eax,%ds
-       movl %eax,%es
-
-       cmpl $(__KERNEL_CS),32(%esp)
-       jne 10f
+       /* The vector number is in pt_regs->gs */
 
-       leal 28(%esp),%eax      # Pointer to %eip
-       call early_fixup_exception
-       andl %eax,%eax
-       jnz ex_entry            /* found an exception entry */
-
-10:
-#ifdef CONFIG_PRINTK
-       xorl %eax,%eax
-       movw %ax,2(%esp)        /* clean up the segment values on some cpus */
-       movw %ax,6(%esp)
-       movw %ax,34(%esp)
-       leal  40(%esp),%eax
-       pushl %eax              /* %esp before the exception */
-       pushl %ebx
-       pushl %ebp
-       pushl %esi
-       pushl %edi
-       movl %cr2,%eax
-       pushl %eax
-       pushl (20+6*4)(%esp)    /* trapno */
-       pushl $fault_msg
-       call printk
-#endif
-       call dump_stack
-hlt_loop:
-       hlt
-       jmp hlt_loop
-
-ex_entry:
-       pop %es
-       pop %ds
-       pop %edx
-       pop %ecx
-       pop %eax
-       decl %ss:early_recursion_flag
-.Lis_nmi:
-       addl $8,%esp            /* drop vector number and error code */
+       cld
+       pushl   %fs             /* pt_regs->fs */
+       movw    $0, 2(%esp)     /* clear high bits (some CPUs leave garbage) */
+       pushl   %es             /* pt_regs->es */
+       movw    $0, 2(%esp)     /* clear high bits (some CPUs leave garbage) */
+       pushl   %ds             /* pt_regs->ds */
+       movw    $0, 2(%esp)     /* clear high bits (some CPUs leave garbage) */
+       pushl   %eax            /* pt_regs->ax */
+       pushl   %ebp            /* pt_regs->bp */
+       pushl   %edi            /* pt_regs->di */
+       pushl   %esi            /* pt_regs->si */
+       pushl   %edx            /* pt_regs->dx */
+       pushl   %ecx            /* pt_regs->cx */
+       pushl   %ebx            /* pt_regs->bx */
+
+       /* Fix up DS and ES */
+       movl    $(__KERNEL_DS), %ecx
+       movl    %ecx, %ds
+       movl    %ecx, %es
+
+       /* Load the vector number into EDX */
+       movl    PT_GS(%esp), %edx
+
+       /* Load GS into pt_regs->gs and clear high bits */
+       movw    %gs, PT_GS(%esp)
+       movw    $0, PT_GS+2(%esp)
+
+       movl    %esp, %eax      /* args are pt_regs (EAX), trapnr (EDX) */
+       call    early_fixup_exception
+
+       popl    %ebx            /* pt_regs->bx */
+       popl    %ecx            /* pt_regs->cx */
+       popl    %edx            /* pt_regs->dx */
+       popl    %esi            /* pt_regs->si */
+       popl    %edi            /* pt_regs->di */
+       popl    %ebp            /* pt_regs->bp */
+       popl    %eax            /* pt_regs->ax */
+       popl    %ds             /* pt_regs->ds */
+       popl    %es             /* pt_regs->es */
+       popl    %fs             /* pt_regs->fs */
+       popl    %gs             /* pt_regs->gs */
+       decl    %ss:early_recursion_flag
+       addl    $4, %esp        /* pop pt_regs->orig_ax */
        iret
 ENDPROC(early_idt_handler_common)
 
@@ -647,10 +638,14 @@ ignore_int:
        popl %eax
 #endif
        iret
+
+hlt_loop:
+       hlt
+       jmp hlt_loop
 ENDPROC(ignore_int)
 __INITDATA
        .align 4
-early_recursion_flag:
+GLOBAL(early_recursion_flag)
        .long 0
 
 __REFDATA
@@ -715,19 +710,6 @@ __INITRODATA
 int_msg:
        .asciz "Unknown interrupt or fault at: %p %p %p\n"
 
-fault_msg:
-/* fault info: */
-       .ascii "BUG: Int %d: CR2 %p\n"
-/* regs pushed in early_idt_handler: */
-       .ascii "     EDI %p  ESI %p  EBP %p  EBX %p\n"
-       .ascii "     ESP %p   ES %p   DS %p\n"
-       .ascii "     EDX %p  ECX %p  EAX %p\n"
-/* fault frame: */
-       .ascii "     vec %p  err %p  EIP %p   CS %p  flg %p\n"
-       .ascii "Stack: %p %p %p %p %p %p %p %p\n"
-       .ascii "       %p %p %p %p %p %p %p %p\n"
-       .asciz "       %p %p %p %p %p %p %p %p\n"
-
 #include "../../x86/xen/xen-head.S"
 
 /*
index 22fbf9df61bb4eecbb5ffe530562b56c1def90b8..5df831ef1442f36c5ee0ac00447a2505216d1a26 100644 (file)
@@ -20,6 +20,7 @@
 #include <asm/processor-flags.h>
 #include <asm/percpu.h>
 #include <asm/nops.h>
+#include "../entry/calling.h"
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/asm-offsets.h>
@@ -64,6 +65,14 @@ startup_64:
         * tables and then reload them.
         */
 
+       /*
+        * Setup stack for verify_cpu(). "-8" because stack_start is defined
+        * this way, see below. Our best guess is a NULL ptr for stack
+        * termination heuristics and we don't want to break anything which
+        * might depend on it (kgdb, ...).
+        */
+       leaq    (__end_init_task - 8)(%rip), %rsp
+
        /* Sanitize CPU configuration */
        call verify_cpu
 
@@ -350,90 +359,48 @@ early_idt_handler_common:
         */
        cld
 
-       cmpl $2,(%rsp)          # X86_TRAP_NMI
-       je .Lis_nmi             # Ignore NMI
-
-       cmpl $2,early_recursion_flag(%rip)
-       jz  1f
        incl early_recursion_flag(%rip)
 
-       pushq %rax              # 64(%rsp)
-       pushq %rcx              # 56(%rsp)
-       pushq %rdx              # 48(%rsp)
-       pushq %rsi              # 40(%rsp)
-       pushq %rdi              # 32(%rsp)
-       pushq %r8               # 24(%rsp)
-       pushq %r9               # 16(%rsp)
-       pushq %r10              #  8(%rsp)
-       pushq %r11              #  0(%rsp)
-
-       cmpl $__KERNEL_CS,96(%rsp)
-       jne 11f
-
-       cmpl $14,72(%rsp)       # Page fault?
+       /* The vector number is currently in the pt_regs->di slot. */
+       pushq %rsi                              /* pt_regs->si */
+       movq 8(%rsp), %rsi                      /* RSI = vector number */
+       movq %rdi, 8(%rsp)                      /* pt_regs->di = RDI */
+       pushq %rdx                              /* pt_regs->dx */
+       pushq %rcx                              /* pt_regs->cx */
+       pushq %rax                              /* pt_regs->ax */
+       pushq %r8                               /* pt_regs->r8 */
+       pushq %r9                               /* pt_regs->r9 */
+       pushq %r10                              /* pt_regs->r10 */
+       pushq %r11                              /* pt_regs->r11 */
+       pushq %rbx                              /* pt_regs->bx */
+       pushq %rbp                              /* pt_regs->bp */
+       pushq %r12                              /* pt_regs->r12 */
+       pushq %r13                              /* pt_regs->r13 */
+       pushq %r14                              /* pt_regs->r14 */
+       pushq %r15                              /* pt_regs->r15 */
+
+       cmpq $14,%rsi           /* Page fault? */
        jnz 10f
-       GET_CR2_INTO(%rdi)      # can clobber any volatile register if pv
+       GET_CR2_INTO(%rdi)      /* Can clobber any volatile register if pv */
        call early_make_pgtable
        andl %eax,%eax
-       jz 20f                  # All good
+       jz 20f                  /* All good */
 
 10:
-       leaq 88(%rsp),%rdi      # Pointer to %rip
+       movq %rsp,%rdi          /* RDI = pt_regs; RSI is already trapnr */
        call early_fixup_exception
-       andl %eax,%eax
-       jnz 20f                 # Found an exception entry
-
-11:
-#ifdef CONFIG_EARLY_PRINTK
-       GET_CR2_INTO(%r9)       # can clobber any volatile register if pv
-       movl 80(%rsp),%r8d      # error code
-       movl 72(%rsp),%esi      # vector number
-       movl 96(%rsp),%edx      # %cs
-       movq 88(%rsp),%rcx      # %rip
-       xorl %eax,%eax
-       leaq early_idt_msg(%rip),%rdi
-       call early_printk
-       cmpl $2,early_recursion_flag(%rip)
-       jz  1f
-       call dump_stack
-#ifdef CONFIG_KALLSYMS 
-       leaq early_idt_ripmsg(%rip),%rdi
-       movq 40(%rsp),%rsi      # %rip again
-       call __print_symbol
-#endif
-#endif /* EARLY_PRINTK */
-1:     hlt
-       jmp 1b
-
-20:    # Exception table entry found or page table generated
-       popq %r11
-       popq %r10
-       popq %r9
-       popq %r8
-       popq %rdi
-       popq %rsi
-       popq %rdx
-       popq %rcx
-       popq %rax
+
+20:
        decl early_recursion_flag(%rip)
-.Lis_nmi:
-       addq $16,%rsp           # drop vector number and error code
-       INTERRUPT_RETURN
+       jmp restore_regs_and_iret
 ENDPROC(early_idt_handler_common)
 
        __INITDATA
 
        .balign 4
-early_recursion_flag:
+GLOBAL(early_recursion_flag)
        .long 0
 
-#ifdef CONFIG_EARLY_PRINTK
-early_idt_msg:
-       .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
-early_idt_ripmsg:
-       .asciz "RIP %s\n"
-#endif /* CONFIG_EARLY_PRINTK */
-
 #define NEXT_PAGE(name) \
        .balign PAGE_SIZE; \
 GLOBAL(name)
index a1f0e4a5c47e3239824ac9c61774127e378d4f03..7282c2e3858ec3fbf236d4790ae917ca05815ed2 100644 (file)
@@ -773,7 +773,6 @@ static struct clocksource clocksource_hpet = {
        .mask           = HPET_MASK,
        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
        .resume         = hpet_resume_counter,
-       .archdata       = { .vclock_mode = VCLOCK_HPET },
 };
 
 static int hpet_clocksource_register(void)
index e565e0e4d21698c40297d01991a74ff659fca5c3..fc25f698d792faed00f461b0a378f30c35e3eb3d 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/cpu.h>
 #include <asm/kprobes.h>
 #include <asm/alternative.h>
+#include <asm/text-patching.h>
 
 #ifdef HAVE_JUMP_LABEL
 
index 2da6ee9ae69b725a1d960c0467005c1e581d96c4..04cde527d72849be75ccb65e0d7ed650a1ef3a82 100644 (file)
@@ -45,6 +45,7 @@
 #include <linux/uaccess.h>
 #include <linux/memory.h>
 
+#include <asm/text-patching.h>
 #include <asm/debugreg.h>
 #include <asm/apicdef.h>
 #include <asm/apic.h>
index ae703acb85c185e8c6f3735da83f9998b2f8e7a1..38cf7a7412503f513bb1aae1c1acc5b76e7d2e1d 100644 (file)
@@ -51,6 +51,7 @@
 #include <linux/ftrace.h>
 #include <linux/frame.h>
 
+#include <asm/text-patching.h>
 #include <asm/cacheflush.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
index 7b3b9d15c47a63953d6932026cc57db795e3a507..4425f593f0ec8c6f9d0842054e5986ed28f99fc2 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/kallsyms.h>
 #include <linux/ftrace.h>
 
+#include <asm/text-patching.h>
 #include <asm/cacheflush.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
index 807950860fb7028e28fe1e98d8a2cddeccfa8063..dc1207e2f19390a7f3dbe5efa967ac008f19da58 100644 (file)
@@ -522,7 +522,7 @@ static noinline uint32_t __kvm_cpuid_base(void)
        if (boot_cpu_data.cpuid_level < 0)
                return 0;       /* So we don't blow up on old processors */
 
-       if (cpu_has_hypervisor)
+       if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
                return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
 
        return 0;
index 005c03e93fc54c7907e8e9e2bc1d771902b1c3ca..477ae806c2fa71f425ff56a8b75b1306dfcf7535 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/jump_label.h>
 #include <linux/random.h>
 
+#include <asm/text-patching.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
index f08ac28b8136da85e0866bf689b3b16faec1c157..f9583917c7c4f440456074efff1179e54b38e558 100644 (file)
@@ -339,8 +339,10 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
        .write_cr8 = native_write_cr8,
 #endif
        .wbinvd = native_wbinvd,
-       .read_msr = native_read_msr_safe,
-       .write_msr = native_write_msr_safe,
+       .read_msr = native_read_msr,
+       .write_msr = native_write_msr,
+       .read_msr_safe = native_read_msr_safe,
+       .write_msr_safe = native_write_msr_safe,
        .read_pmc = native_read_pmc,
        .load_tr_desc = native_load_tr_desc,
        .set_ldt = native_set_ldt,
index 6cbab31ac23a20fb3980f06f88becddff135411b..6b16c36f0939313dde91d03428cdc855ff3dba9e 100644 (file)
@@ -136,25 +136,6 @@ void release_thread(struct task_struct *dead_task)
        }
 }
 
-static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
-{
-       struct user_desc ud = {
-               .base_addr = addr,
-               .limit = 0xfffff,
-               .seg_32bit = 1,
-               .limit_in_pages = 1,
-               .useable = 1,
-       };
-       struct desc_struct *desc = t->thread.tls_array;
-       desc += tls;
-       fill_ldt(desc, &ud);
-}
-
-static inline u32 read_32bit_tls(struct task_struct *t, int tls)
-{
-       return get_desc_base(&t->thread.tls_array[tls]);
-}
-
 int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
                unsigned long arg, struct task_struct *p, unsigned long tls)
 {
@@ -169,9 +150,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
        p->thread.io_bitmap_ptr = NULL;
 
        savesegment(gs, p->thread.gsindex);
-       p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs;
+       p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
        savesegment(fs, p->thread.fsindex);
-       p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
+       p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
        savesegment(es, p->thread.es);
        savesegment(ds, p->thread.ds);
        memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
@@ -210,7 +191,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
         */
        if (clone_flags & CLONE_SETTLS) {
 #ifdef CONFIG_IA32_EMULATION
-               if (is_ia32_task())
+               if (in_ia32_syscall())
                        err = do_set_thread_area(p, -1,
                                (struct user_desc __user *)tls, 0);
                else
@@ -282,7 +263,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        struct fpu *next_fpu = &next->fpu;
        int cpu = smp_processor_id();
        struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
-       unsigned fsindex, gsindex;
+       unsigned prev_fsindex, prev_gsindex;
        fpu_switch_t fpu_switch;
 
        fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
@@ -292,8 +273,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         *
         * (e.g. xen_load_tls())
         */
-       savesegment(fs, fsindex);
-       savesegment(gs, gsindex);
+       savesegment(fs, prev_fsindex);
+       savesegment(gs, prev_gsindex);
 
        /*
         * Load TLS before restoring any segments so that segment loads
@@ -336,66 +317,104 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         * Switch FS and GS.
         *
         * These are even more complicated than DS and ES: they have
-        * 64-bit bases are that controlled by arch_prctl.  Those bases
-        * only differ from the values in the GDT or LDT if the selector
-        * is 0.
-        *
-        * Loading the segment register resets the hidden base part of
-        * the register to 0 or the value from the GDT / LDT.  If the
-        * next base address zero, writing 0 to the segment register is
-        * much faster than using wrmsr to explicitly zero the base.
-        *
-        * The thread_struct.fs and thread_struct.gs values are 0
-        * if the fs and gs bases respectively are not overridden
-        * from the values implied by fsindex and gsindex.  They
-        * are nonzero, and store the nonzero base addresses, if
-        * the bases are overridden.
-        *
-        * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should
-        * be impossible.
-        *
-        * Therefore we need to reload the segment registers if either
-        * the old or new selector is nonzero, and we need to override
-        * the base address if next thread expects it to be overridden.
+        * 64-bit bases are that controlled by arch_prctl.  The bases
+        * don't necessarily match the selectors, as user code can do
+        * any number of things to cause them to be inconsistent.
         *
-        * This code is unnecessarily slow in the case where the old and
-        * new indexes are zero and the new base is nonzero -- it will
-        * unnecessarily write 0 to the selector before writing the new
-        * base address.
+        * We don't promise to preserve the bases if the selectors are
+        * nonzero.  We also don't promise to preserve the base if the
+        * selector is zero and the base doesn't match whatever was
+        * most recently passed to ARCH_SET_FS/GS.  (If/when the
+        * FSGSBASE instructions are enabled, we'll need to offer
+        * stronger guarantees.)
         *
-        * Note: This all depends on arch_prctl being the only way that
-        * user code can override the segment base.  Once wrfsbase and
-        * wrgsbase are enabled, most of this code will need to change.
+        * As an invariant,
+        * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
+        * impossible.
         */
-       if (unlikely(fsindex | next->fsindex | prev->fs)) {
+       if (next->fsindex) {
+               /* Loading a nonzero value into FS sets the index and base. */
                loadsegment(fs, next->fsindex);
-
-               /*
-                * If user code wrote a nonzero value to FS, then it also
-                * cleared the overridden base address.
-                *
-                * XXX: if user code wrote 0 to FS and cleared the base
-                * address itself, we won't notice and we'll incorrectly
-                * restore the prior base address next time we reschdule
-                * the process.
-                */
-               if (fsindex)
-                       prev->fs = 0;
+       } else {
+               if (next->fsbase) {
+                       /* Next index is zero but next base is nonzero. */
+                       if (prev_fsindex)
+                               loadsegment(fs, 0);
+                       wrmsrl(MSR_FS_BASE, next->fsbase);
+               } else {
+                       /* Next base and index are both zero. */
+                       if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
+                               /*
+                                * We don't know the previous base and can't
+                                * find out without RDMSR.  Forcibly clear it.
+                                */
+                               loadsegment(fs, __USER_DS);
+                               loadsegment(fs, 0);
+                       } else {
+                               /*
+                                * If the previous index is zero and ARCH_SET_FS
+                                * didn't change the base, then the base is
+                                * also zero and we don't need to do anything.
+                                */
+                               if (prev->fsbase || prev_fsindex)
+                                       loadsegment(fs, 0);
+                       }
+               }
        }
-       if (next->fs)
-               wrmsrl(MSR_FS_BASE, next->fs);
-       prev->fsindex = fsindex;
+       /*
+        * Save the old state and preserve the invariant.
+        * NB: if prev_fsindex == 0, then we can't reliably learn the base
+        * without RDMSR because Intel user code can zero it without telling
+        * us and AMD user code can program any 32-bit value without telling
+        * us.
+        */
+       if (prev_fsindex)
+               prev->fsbase = 0;
+       prev->fsindex = prev_fsindex;
 
-       if (unlikely(gsindex | next->gsindex | prev->gs)) {
+       if (next->gsindex) {
+               /* Loading a nonzero value into GS sets the index and base. */
                load_gs_index(next->gsindex);
-
-               /* This works (and fails) the same way as fsindex above. */
-               if (gsindex)
-                       prev->gs = 0;
+       } else {
+               if (next->gsbase) {
+                       /* Next index is zero but next base is nonzero. */
+                       if (prev_gsindex)
+                               load_gs_index(0);
+                       wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
+               } else {
+                       /* Next base and index are both zero. */
+                       if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
+                               /*
+                                * We don't know the previous base and can't
+                                * find out without RDMSR.  Forcibly clear it.
+                                *
+                                * This contains a pointless SWAPGS pair.
+                                * Fixing it would involve an explicit check
+                                * for Xen or a new pvop.
+                                */
+                               load_gs_index(__USER_DS);
+                               load_gs_index(0);
+                       } else {
+                               /*
+                                * If the previous index is zero and ARCH_SET_GS
+                                * didn't change the base, then the base is
+                                * also zero and we don't need to do anything.
+                                */
+                               if (prev->gsbase || prev_gsindex)
+                                       load_gs_index(0);
+                       }
+               }
        }
-       if (next->gs)
-               wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
-       prev->gsindex = gsindex;
+       /*
+        * Save the old state and preserve the invariant.
+        * NB: if prev_gsindex == 0, then we can't reliably learn the base
+        * without RDMSR because Intel user code can zero it without telling
+        * us and AMD user code can program any 32-bit value without telling
+        * us.
+        */
+       if (prev_gsindex)
+               prev->gsbase = 0;
+       prev->gsindex = prev_gsindex;
 
        switch_fpu_finish(next_fpu, fpu_switch);
 
@@ -516,23 +535,11 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
                if (addr >= TASK_SIZE_OF(task))
                        return -EPERM;
                cpu = get_cpu();
-               /* handle small bases via the GDT because that's faster to
-                  switch. */
-               if (addr <= 0xffffffff) {
-                       set_32bit_tls(task, GS_TLS, addr);
-                       if (doit) {
-                               load_TLS(&task->thread, cpu);
-                               load_gs_index(GS_TLS_SEL);
-                       }
-                       task->thread.gsindex = GS_TLS_SEL;
-                       task->thread.gs = 0;
-               } else {
-                       task->thread.gsindex = 0;
-                       task->thread.gs = addr;
-                       if (doit) {
-                               load_gs_index(0);
-                               ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
-                       }
+               task->thread.gsindex = 0;
+               task->thread.gsbase = addr;
+               if (doit) {
+                       load_gs_index(0);
+                       ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
                }
                put_cpu();
                break;
@@ -542,52 +549,30 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
                if (addr >= TASK_SIZE_OF(task))
                        return -EPERM;
                cpu = get_cpu();
-               /* handle small bases via the GDT because that's faster to
-                  switch. */
-               if (addr <= 0xffffffff) {
-                       set_32bit_tls(task, FS_TLS, addr);
-                       if (doit) {
-                               load_TLS(&task->thread, cpu);
-                               loadsegment(fs, FS_TLS_SEL);
-                       }
-                       task->thread.fsindex = FS_TLS_SEL;
-                       task->thread.fs = 0;
-               } else {
-                       task->thread.fsindex = 0;
-                       task->thread.fs = addr;
-                       if (doit) {
-                               /* set the selector to 0 to not confuse
-                                  __switch_to */
-                               loadsegment(fs, 0);
-                               ret = wrmsrl_safe(MSR_FS_BASE, addr);
-                       }
+               task->thread.fsindex = 0;
+               task->thread.fsbase = addr;
+               if (doit) {
+                       /* set the selector to 0 to not confuse __switch_to */
+                       loadsegment(fs, 0);
+                       ret = wrmsrl_safe(MSR_FS_BASE, addr);
                }
                put_cpu();
                break;
        case ARCH_GET_FS: {
                unsigned long base;
-               if (task->thread.fsindex == FS_TLS_SEL)
-                       base = read_32bit_tls(task, FS_TLS);
-               else if (doit)
+               if (doit)
                        rdmsrl(MSR_FS_BASE, base);
                else
-                       base = task->thread.fs;
+                       base = task->thread.fsbase;
                ret = put_user(base, (unsigned long __user *)addr);
                break;
        }
        case ARCH_GET_GS: {
                unsigned long base;
-               unsigned gsindex;
-               if (task->thread.gsindex == GS_TLS_SEL)
-                       base = read_32bit_tls(task, GS_TLS);
-               else if (doit) {
-                       savesegment(gs, gsindex);
-                       if (gsindex)
-                               rdmsrl(MSR_KERNEL_GS_BASE, base);
-                       else
-                               base = task->thread.gs;
-               } else
-                       base = task->thread.gs;
+               if (doit)
+                       rdmsrl(MSR_KERNEL_GS_BASE, base);
+               else
+                       base = task->thread.gsbase;
                ret = put_user(base, (unsigned long __user *)addr);
                break;
        }
index 32e9d9cbb884ae10f0f5035c47bafb69f4517f80..e60ef918f53d52c765cd19821c5810adcda61820 100644 (file)
@@ -303,29 +303,11 @@ static int set_segment_reg(struct task_struct *task,
 
        switch (offset) {
        case offsetof(struct user_regs_struct,fs):
-               /*
-                * If this is setting fs as for normal 64-bit use but
-                * setting fs_base has implicitly changed it, leave it.
-                */
-               if ((value == FS_TLS_SEL && task->thread.fsindex == 0 &&
-                    task->thread.fs != 0) ||
-                   (value == 0 && task->thread.fsindex == FS_TLS_SEL &&
-                    task->thread.fs == 0))
-                       break;
                task->thread.fsindex = value;
                if (task == current)
                        loadsegment(fs, task->thread.fsindex);
                break;
        case offsetof(struct user_regs_struct,gs):
-               /*
-                * If this is setting gs as for normal 64-bit use but
-                * setting gs_base has implicitly changed it, leave it.
-                */
-               if ((value == GS_TLS_SEL && task->thread.gsindex == 0 &&
-                    task->thread.gs != 0) ||
-                   (value == 0 && task->thread.gsindex == GS_TLS_SEL &&
-                    task->thread.gs == 0))
-                       break;
                task->thread.gsindex = value;
                if (task == current)
                        load_gs_index(task->thread.gsindex);
@@ -417,7 +399,7 @@ static int putreg(struct task_struct *child,
                 * to set either thread.fs or thread.fsindex and the
                 * corresponding GDT slot.
                 */
-               if (child->thread.fs != value)
+               if (child->thread.fsbase != value)
                        return do_arch_prctl(child, ARCH_SET_FS, value);
                return 0;
        case offsetof(struct user_regs_struct,gs_base):
@@ -426,7 +408,7 @@ static int putreg(struct task_struct *child,
                 */
                if (value >= TASK_SIZE_OF(child))
                        return -EIO;
-               if (child->thread.gs != value)
+               if (child->thread.gsbase != value)
                        return do_arch_prctl(child, ARCH_SET_GS, value);
                return 0;
 #endif
@@ -453,31 +435,17 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset)
 #ifdef CONFIG_X86_64
        case offsetof(struct user_regs_struct, fs_base): {
                /*
-                * do_arch_prctl may have used a GDT slot instead of
-                * the MSR.  To userland, it appears the same either
-                * way, except the %fs segment selector might not be 0.
+                * XXX: This will not behave as expected if called on
+                * current or if fsindex != 0.
                 */
-               unsigned int seg = task->thread.fsindex;
-               if (task->thread.fs != 0)
-                       return task->thread.fs;
-               if (task == current)
-                       asm("movl %%fs,%0" : "=r" (seg));
-               if (seg != FS_TLS_SEL)
-                       return 0;
-               return get_desc_base(&task->thread.tls_array[FS_TLS]);
+               return task->thread.fsbase;
        }
        case offsetof(struct user_regs_struct, gs_base): {
                /*
-                * Exactly the same here as the %fs handling above.
+                * XXX: This will not behave as expected if called on
+                * current or if fsindex != 0.
                 */
-               unsigned int seg = task->thread.gsindex;
-               if (task->thread.gs != 0)
-                       return task->thread.gs;
-               if (task == current)
-                       asm("movl %%gs,%0" : "=r" (seg));
-               if (seg != GS_TLS_SEL)
-                       return 0;
-               return get_desc_base(&task->thread.tls_array[GS_TLS]);
+               return task->thread.gsbase;
        }
 #endif
        }
@@ -1266,7 +1234,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
                        compat_ulong_t caddr, compat_ulong_t cdata)
 {
 #ifdef CONFIG_X86_X32_ABI
-       if (!is_ia32_task())
+       if (!in_ia32_syscall())
                return x32_arch_ptrace(child, request, caddr, cdata);
 #endif
 #ifdef CONFIG_IA32_EMULATION
index 3e84ef16f65799459cd55e0c1bce50ae43a9195e..22cc2f9f8aec4b1ad35d2efe547d3ae1b87c2932 100644 (file)
@@ -390,7 +390,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
                put_user_ex(&frame->uc, &frame->puc);
 
                /* Create the ucontext.  */
-               if (cpu_has_xsave)
+               if (boot_cpu_has(X86_FEATURE_XSAVE))
                        put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
                else
                        put_user_ex(0, &frame->uc.uc_flags);
@@ -441,7 +441,7 @@ static unsigned long frame_uc_flags(struct pt_regs *regs)
 {
        unsigned long flags;
 
-       if (cpu_has_xsave)
+       if (boot_cpu_has(X86_FEATURE_XSAVE))
                flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
        else
                flags = UC_SIGCONTEXT_SS;
@@ -761,7 +761,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_64
-       if (is_ia32_task())
+       if (in_ia32_syscall())
                return __NR_ia32_restart_syscall;
 #endif
 #ifdef CONFIG_X86_X32_ABI
index 0e4329ed91ef61da5a86e5d604d1ad96109efbfb..fafe8b923cac2d27da4189e6be9a2761bfcd6d01 100644 (file)
@@ -1236,7 +1236,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
         * If we couldn't find a local APIC, then get out of here now!
         */
        if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
-           !cpu_has_apic) {
+           !boot_cpu_has(X86_FEATURE_APIC)) {
                if (!disable_apic) {
                        pr_err("BIOS bug, local APIC #%d not detected!...\n",
                                boot_cpu_physical_apicid);
index ab40954e113e952b088818e60b569cabc96488f5..f386bad0984ed70f89a432d89b6d90f28bb4eaee 100644 (file)
@@ -40,7 +40,7 @@
 static inline void flush_tce(void* tceaddr)
 {
        /* a single tce can't cross a cache line */
-       if (cpu_has_clflush)
+       if (boot_cpu_has(X86_FEATURE_CLFLUSH))
                clflush(tceaddr);
        else
                wbinvd();
index 7fc5e843f247b358288b23e459eebfefcf6631f0..9692a5e9fdab2002f31c6dd2dce2c113647608b6 100644 (file)
@@ -114,6 +114,7 @@ int do_set_thread_area(struct task_struct *p, int idx,
                       int can_allocate)
 {
        struct user_desc info;
+       unsigned short __maybe_unused sel, modified_sel;
 
        if (copy_from_user(&info, u_info, sizeof(info)))
                return -EFAULT;
@@ -141,6 +142,47 @@ int do_set_thread_area(struct task_struct *p, int idx,
 
        set_tls_desc(p, idx, &info, 1);
 
+       /*
+        * If DS, ES, FS, or GS points to the modified segment, forcibly
+        * refresh it.  Only needed on x86_64 because x86_32 reloads them
+        * on return to user mode.
+        */
+       modified_sel = (idx << 3) | 3;
+
+       if (p == current) {
+#ifdef CONFIG_X86_64
+               savesegment(ds, sel);
+               if (sel == modified_sel)
+                       loadsegment(ds, sel);
+
+               savesegment(es, sel);
+               if (sel == modified_sel)
+                       loadsegment(es, sel);
+
+               savesegment(fs, sel);
+               if (sel == modified_sel)
+                       loadsegment(fs, sel);
+
+               savesegment(gs, sel);
+               if (sel == modified_sel)
+                       load_gs_index(sel);
+#endif
+
+#ifdef CONFIG_X86_32_LAZY_GS
+               savesegment(gs, sel);
+               if (sel == modified_sel)
+                       loadsegment(gs, sel);
+#endif
+       } else {
+#ifdef CONFIG_X86_64
+               if (p->thread.fsindex == modified_sel)
+                       p->thread.fsbase = info.base_addr;
+
+               if (p->thread.gsindex == modified_sel)
+                       p->thread.gsbase = info.base_addr;
+#endif
+       }
+
        return 0;
 }
 
index 06cbe25861f1591a7829b5d9347e1a753f874456..d1590486204a1bb52974c27793dc66139f20c4ee 100644 (file)
@@ -51,6 +51,7 @@
 #include <asm/processor.h>
 #include <asm/debugreg.h>
 #include <linux/atomic.h>
+#include <asm/text-patching.h>
 #include <asm/ftrace.h>
 #include <asm/traps.h>
 #include <asm/desc.h>
index c9c4c7ce3eb23c8fea3f6b05647c482edf81c939..38ba6de56edec93badec52707045b0db3a189e0a 100644 (file)
@@ -36,7 +36,7 @@ static int __read_mostly tsc_unstable;
 
 /* native_sched_clock() is called before tsc_init(), so
    we must start with the TSC soft disabled to prevent
-   erroneous rdtsc usage on !cpu_has_tsc processors */
+   erroneous rdtsc usage on !boot_cpu_has(X86_FEATURE_TSC) processors */
 static int __read_mostly tsc_disabled = -1;
 
 static DEFINE_STATIC_KEY_FALSE(__use_tsc);
@@ -834,15 +834,15 @@ int recalibrate_cpu_khz(void)
 #ifndef CONFIG_SMP
        unsigned long cpu_khz_old = cpu_khz;
 
-       if (cpu_has_tsc) {
-               tsc_khz = x86_platform.calibrate_tsc();
-               cpu_khz = tsc_khz;
-               cpu_data(0).loops_per_jiffy =
-                       cpufreq_scale(cpu_data(0).loops_per_jiffy,
-                                       cpu_khz_old, cpu_khz);
-               return 0;
-       } else
+       if (!boot_cpu_has(X86_FEATURE_TSC))
                return -ENODEV;
+
+       tsc_khz = x86_platform.calibrate_tsc();
+       cpu_khz = tsc_khz;
+       cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
+                                                   cpu_khz_old, cpu_khz);
+
+       return 0;
 #else
        return -ENODEV;
 #endif
@@ -922,9 +922,6 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
        struct cpufreq_freqs *freq = data;
        unsigned long *lpj;
 
-       if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
-               return 0;
-
        lpj = &boot_cpu_data.loops_per_jiffy;
 #ifdef CONFIG_SMP
        if (!(freq->flags & CPUFREQ_CONST_LOOPS))
@@ -954,9 +951,9 @@ static struct notifier_block time_cpufreq_notifier_block = {
        .notifier_call  = time_cpufreq_notifier
 };
 
-static int __init cpufreq_tsc(void)
+static int __init cpufreq_register_tsc_scaling(void)
 {
-       if (!cpu_has_tsc)
+       if (!boot_cpu_has(X86_FEATURE_TSC))
                return 0;
        if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
                return 0;
@@ -965,7 +962,7 @@ static int __init cpufreq_tsc(void)
        return 0;
 }
 
-core_initcall(cpufreq_tsc);
+core_initcall(cpufreq_register_tsc_scaling);
 
 #endif /* CONFIG_CPU_FREQ */
 
@@ -1081,7 +1078,7 @@ static void __init check_system_tsc_reliable(void)
  */
 int unsynchronized_tsc(void)
 {
-       if (!cpu_has_tsc || tsc_unstable)
+       if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable)
                return 1;
 
 #ifdef CONFIG_SMP
@@ -1205,7 +1202,7 @@ out:
 
 static int __init init_tsc_clocksource(void)
 {
-       if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz)
+       if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz)
                return 0;
 
        if (tsc_clocksource_reliable)
@@ -1242,7 +1239,7 @@ void __init tsc_init(void)
        u64 lpj;
        int cpu;
 
-       if (!cpu_has_tsc) {
+       if (!boot_cpu_has(X86_FEATURE_TSC)) {
                setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
                return;
        }
index bd074151bfd641cf93afa6061e9452fd2aefbd1d..6c1ff31d99ffeb0d0a28c5ee472bb1865ff23df3 100644 (file)
@@ -516,7 +516,7 @@ struct uprobe_xol_ops {
 
 static inline int sizeof_long(void)
 {
-       return is_ia32_task() ? 4 : 8;
+       return in_ia32_syscall() ? 4 : 8;
 }
 
 static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
index bbbaa802d13efc8b1e57f7defa351cacb2aaab78..769af907f82485edc91bf3e4bbfcebddbe148fcc 100644 (file)
@@ -75,7 +75,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
                return 0;
 
        /* Update OSXSAVE bit */
-       if (cpu_has_xsave && best->function == 0x1) {
+       if (boot_cpu_has(X86_FEATURE_XSAVE) && best->function == 0x1) {
                best->ecx &= ~F(OSXSAVE);
                if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
                        best->ecx |= F(OSXSAVE);
index b6f50e8b0a393675009a5dcaad7f30af315bc91d..38c0c32926c96bc154c2ce6c7c6cb06a30b03ac2 100644 (file)
@@ -3844,7 +3844,8 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
                __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
                                        boot_cpu_data.x86_phys_bits,
                                        context->shadow_root_level, false,
-                                       cpu_has_gbpages, true, true);
+                                       boot_cpu_has(X86_FEATURE_GBPAGES),
+                                       true, true);
        else
                __reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
                                            boot_cpu_data.x86_phys_bits,
index 31346a3f20a5c8b5384e6fda2a81029059cdf621..fafd720ce10a12cbe6e70da6c3dc1796af3bd447 100644 (file)
@@ -1254,7 +1254,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
        kvm_load_ldt(svm->host.ldt);
 #ifdef CONFIG_X86_64
        loadsegment(fs, svm->host.fs);
-       wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
+       wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
        load_gs_index(svm->host.gs);
 #else
 #ifdef CONFIG_X86_32_LAZY_GS
index 2f1ea2f61e1fceef4a77b3955c4d3a9c4a936a72..b72743c5668d3d55387a55d06c0c886cf2b7b1b1 100644 (file)
@@ -809,8 +809,7 @@ TRACE_EVENT(kvm_write_tsc_offset,
 
 #define host_clocks                                    \
        {VCLOCK_NONE, "none"},                          \
-       {VCLOCK_TSC,  "tsc"},                           \
-       {VCLOCK_HPET, "hpet"}                           \
+       {VCLOCK_TSC,  "tsc"}                            \
 
 TRACE_EVENT(kvm_update_master_clock,
        TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched),
index 133679d520afee3934bd5dc155d9675d69198a8d..cb47fe3da2926b3c1c17df41625bef9492353554 100644 (file)
@@ -3390,7 +3390,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                }
        }
 
-       if (cpu_has_xsaves)
+       if (boot_cpu_has(X86_FEATURE_XSAVES))
                rdmsrl(MSR_IA32_XSS, host_xss);
 
        return 0;
index 9b7798c7b210e75499644ed1ca35b643fe743208..12f33e6623826dfcd0af660a534e8240683bc1a2 100644 (file)
@@ -2611,7 +2611,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                r = KVM_MAX_MCE_BANKS;
                break;
        case KVM_CAP_XCRS:
-               r = cpu_has_xsave;
+               r = boot_cpu_has(X86_FEATURE_XSAVE);
                break;
        case KVM_CAP_TSC_CONTROL:
                r = kvm_has_tsc_control;
@@ -3094,7 +3094,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 
        /* Set XSTATE_BV and possibly XCOMP_BV.  */
        xsave->header.xfeatures = xstate_bv;
-       if (cpu_has_xsaves)
+       if (boot_cpu_has(X86_FEATURE_XSAVES))
                xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
        /*
@@ -3121,7 +3121,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
                                         struct kvm_xsave *guest_xsave)
 {
-       if (cpu_has_xsave) {
+       if (boot_cpu_has(X86_FEATURE_XSAVE)) {
                memset(guest_xsave, 0, sizeof(struct kvm_xsave));
                fill_xsave((u8 *) guest_xsave->region, vcpu);
        } else {
@@ -3139,7 +3139,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
        u64 xstate_bv =
                *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
 
-       if (cpu_has_xsave) {
+       if (boot_cpu_has(X86_FEATURE_XSAVE)) {
                /*
                 * Here we allow setting states that are not present in
                 * CPUID leaf 0xD, index 0, EDX:EAX.  This is for compatibility
@@ -3160,7 +3160,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
 static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
                                        struct kvm_xcrs *guest_xcrs)
 {
-       if (!cpu_has_xsave) {
+       if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
                guest_xcrs->nr_xcrs = 0;
                return;
        }
@@ -3176,7 +3176,7 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
 {
        int i, r = 0;
 
-       if (!cpu_has_xsave)
+       if (!boot_cpu_has(X86_FEATURE_XSAVE))
                return -EINVAL;
 
        if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
@@ -5865,7 +5865,7 @@ int kvm_arch_init(void *opaque)
 
        perf_register_guest_info_callbacks(&kvm_guest_cbs);
 
-       if (cpu_has_xsave)
+       if (boot_cpu_has(X86_FEATURE_XSAVE))
                host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
 
        kvm_lapic_init();
@@ -7293,7 +7293,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 static void fx_init(struct kvm_vcpu *vcpu)
 {
        fpstate_init(&vcpu->arch.guest_fpu.state);
-       if (cpu_has_xsaves)
+       if (boot_cpu_has(X86_FEATURE_XSAVES))
                vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
                        host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
index 91d93b95bd8685228b395c10e77d30e3a4303355..b559d923878133aadb4480c61e642bbf6c799086 100644 (file)
@@ -612,7 +612,7 @@ unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
 {
        stac();
 #ifdef CONFIG_X86_INTEL_USERCOPY
-       if (n > 64 && cpu_has_xmm2)
+       if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
                n = __copy_user_zeroing_intel_nocache(to, from, n);
        else
                __copy_user_zeroing(to, from, n);
@@ -629,7 +629,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
 {
        stac();
 #ifdef CONFIG_X86_INTEL_USERCOPY
-       if (n > 64 && cpu_has_xmm2)
+       if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
                n = __copy_user_intel_nocache(to, from, n);
        else
                __copy_user(to, from, n);
index 82447b3fba380d6547619958c07ed7c3d2d3010b..4bb53b89f3c55defd9098d81002eed1877b0fa07 100644 (file)
@@ -1,5 +1,6 @@
 #include <linux/module.h>
 #include <asm/uaccess.h>
+#include <asm/traps.h>
 
 typedef bool (*ex_handler_t)(const struct exception_table_entry *,
                            struct pt_regs *, int);
@@ -42,6 +43,43 @@ bool ex_handler_ext(const struct exception_table_entry *fixup,
 }
 EXPORT_SYMBOL(ex_handler_ext);
 
+bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
+                            struct pt_regs *regs, int trapnr)
+{
+       WARN_ONCE(1, "unchecked MSR access error: RDMSR from 0x%x\n",
+                 (unsigned int)regs->cx);
+
+       /* Pretend that the read succeeded and returned 0. */
+       regs->ip = ex_fixup_addr(fixup);
+       regs->ax = 0;
+       regs->dx = 0;
+       return true;
+}
+EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);
+
+bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
+                            struct pt_regs *regs, int trapnr)
+{
+       WARN_ONCE(1, "unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x)\n",
+                 (unsigned int)regs->cx,
+                 (unsigned int)regs->dx, (unsigned int)regs->ax);
+
+       /* Pretend that the write succeeded. */
+       regs->ip = ex_fixup_addr(fixup);
+       return true;
+}
+EXPORT_SYMBOL(ex_handler_wrmsr_unsafe);
+
+bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
+                        struct pt_regs *regs, int trapnr)
+{
+       if (static_cpu_has(X86_BUG_NULL_SEG))
+               asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
+       asm volatile ("mov %0, %%fs" : : "rm" (0));
+       return ex_handler_default(fixup, regs, trapnr);
+}
+EXPORT_SYMBOL(ex_handler_clear_fs);
+
 bool ex_has_fault_handler(unsigned long ip)
 {
        const struct exception_table_entry *e;
@@ -82,24 +120,46 @@ int fixup_exception(struct pt_regs *regs, int trapnr)
        return handler(e, regs, trapnr);
 }
 
+extern unsigned int early_recursion_flag;
+
 /* Restricted version used during very early boot */
-int __init early_fixup_exception(unsigned long *ip)
+void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
 {
-       const struct exception_table_entry *e;
-       unsigned long new_ip;
-       ex_handler_t handler;
-
-       e = search_exception_tables(*ip);
-       if (!e)
-               return 0;
-
-       new_ip  = ex_fixup_addr(e);
-       handler = ex_fixup_handler(e);
-
-       /* special handling not supported during early boot */
-       if (handler != ex_handler_default)
-               return 0;
-
-       *ip = new_ip;
-       return 1;
+       /* Ignore early NMIs. */
+       if (trapnr == X86_TRAP_NMI)
+               return;
+
+       if (early_recursion_flag > 2)
+               goto halt_loop;
+
+       if (regs->cs != __KERNEL_CS)
+               goto fail;
+
+       /*
+        * The full exception fixup machinery is available as soon as
+        * the early IDT is loaded.  This means that it is the
+        * responsibility of extable users to either function correctly
+        * when handlers are invoked early or to simply avoid causing
+        * exceptions before they're ready to handle them.
+        *
+        * This is better than filtering which handlers can be used,
+        * because refusing to call a handler here is guaranteed to
+        * result in a hard-to-debug panic.
+        *
+        * Keep in mind that not all vectors actually get here.  Early
+        * fage faults, for example, are special.
+        */
+       if (fixup_exception(regs, trapnr))
+               return;
+
+fail:
+       early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
+                    (unsigned)trapnr, (unsigned long)regs->cs, regs->ip,
+                    regs->orig_ax, read_cr2());
+
+       show_regs(regs);
+
+halt_loop:
+       while (true)
+               halt();
 }
index 740d7ac03a552bc4937edfc8ff7e8b9d044a61b6..14a95054d4e058a85f6b8d80c162aa2617e7f848 100644 (file)
@@ -162,7 +162,7 @@ static __init int setup_hugepagesz(char *opt)
        unsigned long ps = memparse(opt, &opt);
        if (ps == PMD_SIZE) {
                hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
-       } else if (ps == PUD_SIZE && cpu_has_gbpages) {
+       } else if (ps == PUD_SIZE && boot_cpu_has(X86_FEATURE_GBPAGES)) {
                hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
        } else {
                printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n",
@@ -177,7 +177,7 @@ __setup("hugepagesz=", setup_hugepagesz);
 static __init int gigantic_pages_init(void)
 {
        /* With compaction or CMA we can allocate gigantic pages at runtime */
-       if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT))
+       if (boot_cpu_has(X86_FEATURE_GBPAGES) && !size_to_hstate(1UL << PUD_SHIFT))
                hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
        return 0;
 }
index 9d56f271d519592a5fbf316237f73b57b0fc49c4..372aad2b32910d30eb3f062e67a7589340847d32 100644 (file)
@@ -157,23 +157,23 @@ static void __init probe_page_size_mask(void)
         * This will simplify cpa(), which otherwise needs to support splitting
         * large pages into small in interrupt context, etc.
         */
-       if (cpu_has_pse && !debug_pagealloc_enabled())
+       if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled())
                page_size_mask |= 1 << PG_LEVEL_2M;
 #endif
 
        /* Enable PSE if available */
-       if (cpu_has_pse)
+       if (boot_cpu_has(X86_FEATURE_PSE))
                cr4_set_bits_and_update_boot(X86_CR4_PSE);
 
        /* Enable PGE if available */
-       if (cpu_has_pge) {
+       if (boot_cpu_has(X86_FEATURE_PGE)) {
                cr4_set_bits_and_update_boot(X86_CR4_PGE);
                __supported_pte_mask |= _PAGE_GLOBAL;
        } else
                __supported_pte_mask &= ~_PAGE_GLOBAL;
 
        /* Enable 1 GB linear kernel mappings if available: */
-       if (direct_gbpages && cpu_has_gbpages) {
+       if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
                printk(KERN_INFO "Using GB pages for direct mapping\n");
                page_size_mask |= 1 << PG_LEVEL_1G;
        } else {
index bd7a9b9e2e14a595adfb1c86bbcfacb787a6579c..85af914e3d27582bd29449fae698a4f0bfd4a85e 100644 (file)
@@ -284,7 +284,7 @@ kernel_physical_mapping_init(unsigned long start,
         */
        mapping_iter = 1;
 
-       if (!cpu_has_pse)
+       if (!boot_cpu_has(X86_FEATURE_PSE))
                use_pse = 0;
 
 repeat:
index 214afda979114f3cf1ad7e2d95173e0446a84ed3..89d97477c1d926ada477ff41516f3bcda8c4b76e 100644 (file)
@@ -1295,7 +1295,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
        struct vmem_altmap *altmap = to_vmem_altmap(start);
        int err;
 
-       if (cpu_has_pse)
+       if (boot_cpu_has(X86_FEATURE_PSE))
                err = vmemmap_populate_hugepages(start, end, node, altmap);
        else if (altmap) {
                pr_err_once("%s: no cpu support for altmap allocations\n",
@@ -1338,7 +1338,7 @@ void register_page_bootmem_memmap(unsigned long section_nr,
                }
                get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
 
-               if (!cpu_has_pse) {
+               if (!boot_cpu_has(X86_FEATURE_PSE)) {
                        next = (addr + PAGE_SIZE) & PAGE_MASK;
                        pmd = pmd_offset(pud, addr);
                        if (pmd_none(*pmd))
index 0d8d53d1f5cc29c2376e8d72204896b0be6b4878..f0894910bdd731c0e2adc3fd70d94c5f90f62f2a 100644 (file)
@@ -378,7 +378,7 @@ EXPORT_SYMBOL(iounmap);
 int __init arch_ioremap_pud_supported(void)
 {
 #ifdef CONFIG_X86_64
-       return cpu_has_gbpages;
+       return boot_cpu_has(X86_FEATURE_GBPAGES);
 #else
        return 0;
 #endif
@@ -386,7 +386,7 @@ int __init arch_ioremap_pud_supported(void)
 
 int __init arch_ioremap_pmd_supported(void)
 {
-       return cpu_has_pse;
+       return boot_cpu_has(X86_FEATURE_PSE);
 }
 
 /*
index a1f0e1d0ddc2453a07f86c53ec9791cc9164c6ba..7a1f7bbf4105b6ec570c9c15497a00237c5185e9 100644 (file)
@@ -1055,7 +1055,7 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
        /*
         * Map everything starting from the Gb boundary, possibly with 1G pages
         */
-       while (cpu_has_gbpages && end - start >= PUD_SIZE) {
+       while (boot_cpu_has(X86_FEATURE_GBPAGES) && end - start >= PUD_SIZE) {
                set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE |
                                   massage_pgprot(pud_pgprot)));
 
@@ -1466,7 +1466,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
         * error case we fall back to cpa_flush_all (which uses
         * WBINVD):
         */
-       if (!ret && cpu_has_clflush) {
+       if (!ret && boot_cpu_has(X86_FEATURE_CLFLUSH)) {
                if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
                        cpa_flush_array(addr, numpages, cache,
                                        cpa.flags, pages);
index faec01e7a17d21fbd7abafc42fb9855dc7d530f4..fb0604f11eec268a2cc69b1d47e3885b073c6cf1 100644 (file)
 static bool boot_cpu_done;
 
 static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT);
+static void init_cache_modes(void);
 
-static inline void pat_disable(const char *reason)
+void pat_disable(const char *reason)
 {
+       if (!__pat_enabled)
+               return;
+
+       if (boot_cpu_done) {
+               WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
+               return;
+       }
+
        __pat_enabled = 0;
        pr_info("x86/PAT: %s\n", reason);
+
+       init_cache_modes();
 }
 
 static int __init nopat(char *str)
@@ -181,7 +192,7 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg)
  * configuration.
  * Using lower indices is preferred, so we start with highest index.
  */
-void pat_init_cache_modes(u64 pat)
+static void __init_cache_modes(u64 pat)
 {
        enum page_cache_mode cache;
        char pat_msg[33];
@@ -202,14 +213,11 @@ static void pat_bsp_init(u64 pat)
 {
        u64 tmp_pat;
 
-       if (!cpu_has_pat) {
+       if (!boot_cpu_has(X86_FEATURE_PAT)) {
                pat_disable("PAT not supported by CPU.");
                return;
        }
 
-       if (!pat_enabled())
-               goto done;
-
        rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
        if (!tmp_pat) {
                pat_disable("PAT MSR is 0, disabled.");
@@ -218,16 +226,12 @@ static void pat_bsp_init(u64 pat)
 
        wrmsrl(MSR_IA32_CR_PAT, pat);
 
-done:
-       pat_init_cache_modes(pat);
+       __init_cache_modes(pat);
 }
 
 static void pat_ap_init(u64 pat)
 {
-       if (!pat_enabled())
-               return;
-
-       if (!cpu_has_pat) {
+       if (!boot_cpu_has(X86_FEATURE_PAT)) {
                /*
                 * If this happens we are on a secondary CPU, but switched to
                 * PAT on the boot CPU. We have no way to undo PAT.
@@ -238,18 +242,32 @@ static void pat_ap_init(u64 pat)
        wrmsrl(MSR_IA32_CR_PAT, pat);
 }
 
-void pat_init(void)
+static void init_cache_modes(void)
 {
-       u64 pat;
-       struct cpuinfo_x86 *c = &boot_cpu_data;
+       u64 pat = 0;
+       static int init_cm_done;
 
-       if (!pat_enabled()) {
+       if (init_cm_done)
+               return;
+
+       if (boot_cpu_has(X86_FEATURE_PAT)) {
+               /*
+                * CPU supports PAT. Set PAT table to be consistent with
+                * PAT MSR. This case supports "nopat" boot option, and
+                * virtual machine environments which support PAT without
+                * MTRRs. In specific, Xen has unique setup to PAT MSR.
+                *
+                * If PAT MSR returns 0, it is considered invalid and emulates
+                * as No PAT.
+                */
+               rdmsrl(MSR_IA32_CR_PAT, pat);
+       }
+
+       if (!pat) {
                /*
                 * No PAT. Emulate the PAT table that corresponds to the two
-                * cache bits, PWT (Write Through) and PCD (Cache Disable). This
-                * setup is the same as the BIOS default setup when the system
-                * has PAT but the "nopat" boot option has been specified. This
-                * emulated PAT table is used when MSR_IA32_CR_PAT returns 0.
+                * cache bits, PWT (Write Through) and PCD (Cache Disable).
+                * This setup is also the same as the BIOS default setup.
                 *
                 * PTE encoding:
                 *
@@ -266,10 +284,36 @@ void pat_init(void)
                 */
                pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
                      PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
+       }
+
+       __init_cache_modes(pat);
+
+       init_cm_done = 1;
+}
+
+/**
+ * pat_init - Initialize PAT MSR and PAT table
+ *
+ * This function initializes PAT MSR and PAT table with an OS-defined value
+ * to enable additional cache attributes, WC and WT.
+ *
+ * This function must be called on all CPUs using the specific sequence of
+ * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
+ * procedure for PAT.
+ */
+void pat_init(void)
+{
+       u64 pat;
+       struct cpuinfo_x86 *c = &boot_cpu_data;
+
+       if (!pat_enabled()) {
+               init_cache_modes();
+               return;
+       }
 
-       } else if ((c->x86_vendor == X86_VENDOR_INTEL) &&
-                  (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
-                   ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
+       if ((c->x86_vendor == X86_VENDOR_INTEL) &&
+           (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
+            ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
                /*
                 * PAT support with the lower four entries. Intel Pentium 2,
                 * 3, M, and 4 are affected by PAT errata, which makes the
@@ -734,25 +778,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
        if (file->f_flags & O_DSYNC)
                pcm = _PAGE_CACHE_MODE_UC_MINUS;
 
-#ifdef CONFIG_X86_32
-       /*
-        * On the PPro and successors, the MTRRs are used to set
-        * memory types for physical addresses outside main memory,
-        * so blindly setting UC or PWT on those pages is wrong.
-        * For Pentiums and earlier, the surround logic should disable
-        * caching for the high addresses through the KEN pin, but
-        * we maintain the tradition of paranoia in this code.
-        */
-       if (!pat_enabled() &&
-           !(boot_cpu_has(X86_FEATURE_MTRR) ||
-             boot_cpu_has(X86_FEATURE_K6_MTRR) ||
-             boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
-             boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
-           (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
-               pcm = _PAGE_CACHE_MODE_UC;
-       }
-#endif
-
        *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
                             cachemode2protval(pcm));
        return 1;
index 0e07e0968c3a0d5959d554c0c29ab358eb78b82b..28c04123b6ddaebce73e967644cf589a0b524ac2 100644 (file)
@@ -636,7 +636,7 @@ static int __init ppro_init(char **cpu_type)
        __u8 cpu_model = boot_cpu_data.x86_model;
        struct op_x86_model_spec *spec = &op_ppro_spec; /* default */
 
-       if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon)
+       if (force_cpu_type == arch_perfmon && boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
                return 0;
 
        /*
@@ -700,7 +700,7 @@ int __init op_nmi_init(struct oprofile_operations *ops)
        char *cpu_type = NULL;
        int ret = 0;
 
-       if (!cpu_has_apic)
+       if (!boot_cpu_has(X86_FEATURE_APIC))
                return -ENODEV;
 
        if (force_cpu_type == timer)
@@ -761,7 +761,7 @@ int __init op_nmi_init(struct oprofile_operations *ops)
                if (cpu_type)
                        break;
 
-               if (!cpu_has_arch_perfmon)
+               if (!boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
                        return -ENODEV;
 
                /* use arch perfmon as fallback */
index d90528ea541206b57f3048e191d4340ee070b40b..350f7096baac82893bc076fd6db4d04a685d7104 100644 (file)
@@ -75,7 +75,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
        u64 val;
        int i;
 
-       if (cpu_has_arch_perfmon) {
+       if (boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
                union cpuid10_eax eax;
                eax.full = cpuid_eax(0xa);
 
index beac4dfdade6c05c02591e924fd913d5db53abcd..4bd08b0fc8ea1b1c9badf0128920858ef8d41336 100644 (file)
@@ -445,7 +445,7 @@ void __init xen_msi_init(void)
                uint32_t eax = cpuid_eax(xen_cpuid_base() + 4);
 
                if (((eax & XEN_HVM_CPUID_X2APIC_VIRT) && x2apic_mode) ||
-                   ((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && cpu_has_apic))
+                   ((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && boot_cpu_has(X86_FEATURE_APIC)))
                        return;
        }
 
index 291226b952a997f55d3a0be723f15cb9b9b1ab92..9f14bd34581d663a22cb326d0ad3f98b7c2822d6 100644 (file)
@@ -106,7 +106,7 @@ static int resume_physical_mapping_init(pgd_t *pgd_base)
                         * normal page tables.
                         * NOTE: We can mark everything as executable here
                         */
-                       if (cpu_has_pse) {
+                       if (boot_cpu_has(X86_FEATURE_PSE)) {
                                set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
                                pfn += PTRS_PER_PTE;
                        } else {
index 880862c7d9ddba51e1b6964bc80dcf49d6a8b6ff..6ab672233ac9861d35d5ee53aebcf84286f46f0c 100644 (file)
@@ -75,7 +75,6 @@
 #include <asm/mach_traps.h>
 #include <asm/mwait.h>
 #include <asm/pci_x86.h>
-#include <asm/pat.h>
 #include <asm/cpu.h>
 
 #ifdef CONFIG_ACPI
@@ -1093,6 +1092,26 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
        return ret;
 }
 
+static u64 xen_read_msr(unsigned int msr)
+{
+       /*
+        * This will silently swallow a #GP from RDMSR.  It may be worth
+        * changing that.
+        */
+       int err;
+
+       return xen_read_msr_safe(msr, &err);
+}
+
+static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
+{
+       /*
+        * This will silently swallow a #GP from WRMSR.  It may be worth
+        * changing that.
+        */
+       xen_write_msr_safe(msr, low, high);
+}
+
 void xen_setup_shared_info(void)
 {
        if (!xen_feature(XENFEAT_auto_translated_physmap)) {
@@ -1223,8 +1242,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 
        .wbinvd = native_wbinvd,
 
-       .read_msr = xen_read_msr_safe,
-       .write_msr = xen_write_msr_safe,
+       .read_msr = xen_read_msr,
+       .write_msr = xen_write_msr,
+
+       .read_msr_safe = xen_read_msr_safe,
+       .write_msr_safe = xen_write_msr_safe,
 
        .read_pmc = xen_read_pmc,
 
@@ -1469,10 +1491,10 @@ static void xen_pvh_set_cr_flags(int cpu)
         * For BSP, PSE PGE are set in probe_page_size_mask(), for APs
         * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu().
        */
-       if (cpu_has_pse)
+       if (boot_cpu_has(X86_FEATURE_PSE))
                cr4_set_bits_and_update_boot(X86_CR4_PSE);
 
-       if (cpu_has_pge)
+       if (boot_cpu_has(X86_FEATURE_PGE))
                cr4_set_bits_and_update_boot(X86_CR4_PGE);
 }
 
@@ -1511,7 +1533,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
 {
        struct physdev_set_iopl set_iopl;
        unsigned long initrd_start = 0;
-       u64 pat;
        int rc;
 
        if (!xen_start_info)
@@ -1618,13 +1639,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
                                   xen_start_info->nr_pages);
        xen_reserve_special_pages();
 
-       /*
-        * Modify the cache mode translation tables to match Xen's PAT
-        * configuration.
-        */
-       rdmsrl(MSR_IA32_CR_PAT, pat);
-       pat_init_cache_modes(pat);
-
        /* keep using Xen gdt for now; no urgent need to change it */
 
 #ifdef CONFIG_X86_32
index 0f6b229afcb9e621eb116f6ce805a619ddfe96b3..247bfa8eaddbf3659daec2949561129e4a1a4db1 100644 (file)
@@ -945,7 +945,7 @@ static int __init longhaul_init(void)
        }
 #endif
 #ifdef CONFIG_X86_IO_APIC
-       if (cpu_has_apic) {
+       if (boot_cpu_has(X86_FEATURE_APIC)) {
                printk(KERN_ERR PFX "APIC detected. Longhaul is currently "
                                "broken in this configuration.\n");
                return -ENODEV;
index 6743ff7dccfa30b2997d2529d97747d5261d5ad9..059f7c39c582827c1ad923c3f2fb6a203a235b52 100644 (file)
@@ -72,7 +72,7 @@ drm_clflush_pages(struct page *pages[], unsigned long num_pages)
 {
 
 #if defined(CONFIG_X86)
-       if (cpu_has_clflush) {
+       if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
                drm_cache_flush_clflush(pages, num_pages);
                return;
        }
@@ -105,7 +105,7 @@ void
 drm_clflush_sg(struct sg_table *st)
 {
 #if defined(CONFIG_X86)
-       if (cpu_has_clflush) {
+       if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
                struct sg_page_iter sg_iter;
 
                mb();
@@ -129,7 +129,7 @@ void
 drm_clflush_virt_range(void *addr, unsigned long length)
 {
 #if defined(CONFIG_X86)
-       if (cpu_has_clflush) {
+       if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
                const int size = boot_cpu_data.x86_clflush_size;
                void *end = addr + length;
                addr = (void *)(((unsigned long)addr) & -size);
index dabc08987b5e20389fa6b57872902bd4706d71cc..f2cb9a9539ee066ef1ee2a6a5ecb666830cec0d9 100644 (file)
@@ -1732,7 +1732,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
        if (args->flags & ~(I915_MMAP_WC))
                return -EINVAL;
 
-       if (args->flags & I915_MMAP_WC && !cpu_has_pat)
+       if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
                return -ENODEV;
 
        obj = drm_gem_object_lookup(dev, file, args->handle);
index 1328bc5021b4cf7287021c81a4252f2cf60cdc2a..b845f468dd74f3b2500d3f1fb059662b6432664d 100644 (file)
@@ -488,7 +488,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
                ret = relocate_entry_cpu(obj, reloc, target_offset);
        else if (obj->map_and_fenceable)
                ret = relocate_entry_gtt(obj, reloc, target_offset);
-       else if (cpu_has_clflush)
+       else if (static_cpu_has(X86_FEATURE_CLFLUSH))
                ret = relocate_entry_clflush(obj, reloc, target_offset);
        else {
                WARN_ONCE(1, "Impossible case in relocation handling\n");
index 6f8b084e13d0724e77c68c249a227fdb6377a02d..3d8ff09eba57696677b242d29e33fb1bac3f592d 100644 (file)
@@ -143,9 +143,9 @@ struct analog_port {
 
 #include <linux/i8253.h>
 
-#define GET_TIME(x)    do { if (cpu_has_tsc) x = (unsigned int)rdtsc(); else x = get_time_pit(); } while (0)
-#define DELTA(x,y)     (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0)))
-#define TIME_NAME      (cpu_has_tsc?"TSC":"PIT")
+#define GET_TIME(x)    do { if (boot_cpu_has(X86_FEATURE_TSC)) x = (unsigned int)rdtsc(); else x = get_time_pit(); } while (0)
+#define DELTA(x,y)     (boot_cpu_has(X86_FEATURE_TSC) ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0)))
+#define TIME_NAME      (boot_cpu_has(X86_FEATURE_TSC)?"TSC":"PIT")
 static unsigned int get_time_pit(void)
 {
         unsigned long flags;
index 8adaaeae32681d863c568eddbfd9af5e12bdc205..49721b4e1975c3c1b038665aa9f18749d95e06e6 100644 (file)
@@ -36,7 +36,7 @@ static void irq_remapping_disable_io_apic(void)
         * As this gets called during crash dump, keep this simple for
         * now.
         */
-       if (cpu_has_apic || apic_from_smp_config())
+       if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config())
                disconnect_bsp_APIC(0);
 }
 
index adc162c7040d7ef0a2f8f738e7a21bab1d57ba09..6e9042e3d2a944db17c37bf9f3fda675c1f0bdd6 100644 (file)
@@ -603,7 +603,7 @@ void __init lguest_arch_host_init(void)
         * doing this.
         */
        get_online_cpus();
-       if (cpu_has_pge) { /* We have a broader idea of "global". */
+       if (boot_cpu_has(X86_FEATURE_PGE)) { /* We have a broader idea of "global". */
                /* Remember that this was originally set (for cleanup). */
                cpu_had_pge = 1;
                /*
index 72c9f1f352b4ec686a073b48f8df52d6245f771f..7c7830722ea2cca599485754f60470be17920820 100644 (file)
@@ -635,10 +635,10 @@ static int receive(struct net_device *dev, int cnt)
 
 #ifdef __i386__
 #include <asm/msr.h>
-#define GETTICK(x)                                                \
-({                                                                \
-       if (cpu_has_tsc)                                          \
-               x = (unsigned int)rdtsc();                \
+#define GETTICK(x)                                             \
+({                                                             \
+       if (boot_cpu_has(X86_FEATURE_TSC))                      \
+               x = (unsigned int)rdtsc();                      \
 })
 #else /* __i386__ */
 #define GETTICK(x)
index 5fbda7b218c7a9ed06b2050ca276443a17b319d9..9cf4f8463c4e6b141e69632cd67f2b24965c9757 100644 (file)
@@ -2425,7 +2425,7 @@ static __init uint32_t visorutil_spar_detect(void)
 {
        unsigned int eax, ebx, ecx, edx;
 
-       if (cpu_has_hypervisor) {
+       if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
                /* check the ID */
                cpuid(UNISYS_SPAR_LEAF_ID, &eax, &ebx, &ecx, &edx);
                return  (ebx == UNISYS_SPAR_ID_EBX) &&
index 339125bb4d2cf919669677c5d8edc900157bcc50..6a67ab94b553363934bc9c2e07eb12d6c8a977f7 100644 (file)
 
 #define INIT_TASK_DATA(align)                                          \
        . = ALIGN(align);                                               \
-       *(.data..init_task)
+       VMLINUX_SYMBOL(__start_init_task) = .;                          \
+       *(.data..init_task)                                             \
+       VMLINUX_SYMBOL(__end_init_task) = .;
 
 /*
  * Read only Data
index b47ebd1706907e2cd852b3c4c30a82e01ee8d654..c73425de3cfe731c5743e7b0d6aec78d601527b5 100644 (file)
@@ -9,6 +9,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_sysc
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
                        test_FCMOV test_FCOMI test_FISTTP \
                        vdso_restorer
+TARGETS_C_64BIT_ONLY := fsgsbase
 
 TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
 TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
new file mode 100644 (file)
index 0000000..5b2b4b3
--- /dev/null
@@ -0,0 +1,398 @@
+/*
+ * fsgsbase.c, an fsgsbase test
+ * Copyright (c) 2014-2016 Andy Lutomirski
+ * GPL v2
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <err.h>
+#include <sys/user.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+#include <signal.h>
+#include <limits.h>
+#include <sys/ucontext.h>
+#include <sched.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <asm/ldt.h>
+#include <sys/mman.h>
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+static volatile sig_atomic_t want_segv;
+static volatile unsigned long segv_addr;
+
+static int nerrs;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+                      int flags)
+{
+       struct sigaction sa;
+       memset(&sa, 0, sizeof(sa));
+       sa.sa_sigaction = handler;
+       sa.sa_flags = SA_SIGINFO | flags;
+       sigemptyset(&sa.sa_mask);
+       if (sigaction(sig, &sa, 0))
+               err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+       struct sigaction sa;
+       memset(&sa, 0, sizeof(sa));
+       sa.sa_handler = SIG_DFL;
+       sigemptyset(&sa.sa_mask);
+       if (sigaction(sig, &sa, 0))
+               err(1, "sigaction");
+}
+
+static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
+{
+       ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+       if (!want_segv) {
+               clearhandler(SIGSEGV);
+               return;  /* Crash cleanly. */
+       }
+
+       want_segv = false;
+       segv_addr = (unsigned long)si->si_addr;
+
+       ctx->uc_mcontext.gregs[REG_RIP] += 4;   /* Skip the faulting mov */
+
+}
+
+enum which_base { FS, GS };
+
+static unsigned long read_base(enum which_base which)
+{
+       unsigned long offset;
+       /*
+        * Unless we have FSGSBASE, there's no direct way to do this from
+        * user mode.  We can get at it indirectly using signals, though.
+        */
+
+       want_segv = true;
+
+       offset = 0;
+       if (which == FS) {
+               /* Use a constant-length instruction here. */
+               asm volatile ("mov %%fs:(%%rcx), %%rax" : : "c" (offset) : "rax");
+       } else {
+               asm volatile ("mov %%gs:(%%rcx), %%rax" : : "c" (offset) : "rax");
+       }
+       if (!want_segv)
+               return segv_addr + offset;
+
+       /*
+        * If that didn't segfault, try the other end of the address space.
+        * Unless we get really unlucky and run into the vsyscall page, this
+        * is guaranteed to segfault.
+        */
+
+       offset = (ULONG_MAX >> 1) + 1;
+       if (which == FS) {
+               asm volatile ("mov %%fs:(%%rcx), %%rax"
+                             : : "c" (offset) : "rax");
+       } else {
+               asm volatile ("mov %%gs:(%%rcx), %%rax"
+                             : : "c" (offset) : "rax");
+       }
+       if (!want_segv)
+               return segv_addr + offset;
+
+       abort();
+}
+
+static void check_gs_value(unsigned long value)
+{
+       unsigned long base;
+       unsigned short sel;
+
+       printf("[RUN]\tARCH_SET_GS to 0x%lx\n", value);
+       if (syscall(SYS_arch_prctl, ARCH_SET_GS, value) != 0)
+               err(1, "ARCH_SET_GS");
+
+       asm volatile ("mov %%gs, %0" : "=rm" (sel));
+       base = read_base(GS);
+       if (base == value) {
+               printf("[OK]\tGSBASE was set as expected (selector 0x%hx)\n",
+                      sel);
+       } else {
+               nerrs++;
+               printf("[FAIL]\tGSBASE was not as expected: got 0x%lx (selector 0x%hx)\n",
+                      base, sel);
+       }
+
+       if (syscall(SYS_arch_prctl, ARCH_GET_GS, &base) != 0)
+               err(1, "ARCH_GET_GS");
+       if (base == value) {
+               printf("[OK]\tARCH_GET_GS worked as expected (selector 0x%hx)\n",
+                      sel);
+       } else {
+               nerrs++;
+               printf("[FAIL]\tARCH_GET_GS was not as expected: got 0x%lx (selector 0x%hx)\n",
+                      base, sel);
+       }
+}
+
+static void mov_0_gs(unsigned long initial_base, bool schedule)
+{
+       unsigned long base, arch_base;
+
+       printf("[RUN]\tARCH_SET_GS to 0x%lx then mov 0 to %%gs%s\n", initial_base, schedule ? " and schedule " : "");
+       if (syscall(SYS_arch_prctl, ARCH_SET_GS, initial_base) != 0)
+               err(1, "ARCH_SET_GS");
+
+       if (schedule)
+               usleep(10);
+
+       asm volatile ("mov %0, %%gs" : : "rm" (0));
+       base = read_base(GS);
+       if (syscall(SYS_arch_prctl, ARCH_GET_GS, &arch_base) != 0)
+               err(1, "ARCH_GET_GS");
+       if (base == arch_base) {
+               printf("[OK]\tGSBASE is 0x%lx\n", base);
+       } else {
+               nerrs++;
+               printf("[FAIL]\tGSBASE changed to 0x%lx but kernel reports 0x%lx\n", base, arch_base);
+       }
+}
+
+static volatile unsigned long remote_base;
+static volatile bool remote_hard_zero;
+static volatile unsigned int ftx;
+
+/*
+ * ARCH_SET_FS/GS(0) may or may not program a selector of zero.  HARD_ZERO
+ * means to force the selector to zero to improve test coverage.
+ */
+#define HARD_ZERO 0xa1fa5f343cb85fa4
+
+static void do_remote_base()
+{
+       unsigned long to_set = remote_base;
+       bool hard_zero = false;
+       if (to_set == HARD_ZERO) {
+               to_set = 0;
+               hard_zero = true;
+       }
+
+       if (syscall(SYS_arch_prctl, ARCH_SET_GS, to_set) != 0)
+               err(1, "ARCH_SET_GS");
+
+       if (hard_zero)
+               asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+       unsigned short sel;
+       asm volatile ("mov %%gs, %0" : "=rm" (sel));
+       printf("\tother thread: ARCH_SET_GS(0x%lx)%s -- sel is 0x%hx\n",
+              to_set, hard_zero ? " and clear gs" : "", sel);
+}
+
+void do_unexpected_base(void)
+{
+       /*
+        * The goal here is to try to arrange for GS == 0, GSBASE !=
+        * 0, and for the the kernel the think that GSBASE == 0.
+        *
+        * To make the test as reliable as possible, this uses
+        * explicit descriptorss.  (This is not the only way.  This
+        * could use ARCH_SET_GS with a low, nonzero base, but the
+        * relevant side effect of ARCH_SET_GS could change.)
+        */
+
+       /* Step 1: tell the kernel that we have GSBASE == 0. */
+       if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
+               err(1, "ARCH_SET_GS");
+
+       /* Step 2: change GSBASE without telling the kernel. */
+       struct user_desc desc = {
+               .entry_number    = 0,
+               .base_addr       = 0xBAADF00D,
+               .limit           = 0xfffff,
+               .seg_32bit       = 1,
+               .contents        = 0, /* Data, grow-up */
+               .read_exec_only  = 0,
+               .limit_in_pages  = 1,
+               .seg_not_present = 0,
+               .useable         = 0
+       };
+       if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
+               printf("\tother thread: using LDT slot 0\n");
+               asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7));
+       } else {
+               /* No modify_ldt for us (configured out, perhaps) */
+
+               struct user_desc *low_desc = mmap(
+                       NULL, sizeof(desc),
+                       PROT_READ | PROT_WRITE,
+                       MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
+               memcpy(low_desc, &desc, sizeof(desc));
+
+               low_desc->entry_number = -1;
+
+               /* 32-bit set_thread_area */
+               long ret;
+               asm volatile ("int $0x80"
+                             : "=a" (ret) : "a" (243), "b" (low_desc)
+                             : "flags");
+               memcpy(&desc, low_desc, sizeof(desc));
+               munmap(low_desc, sizeof(desc));
+
+               if (ret != 0) {
+                       printf("[NOTE]\tcould not create a segment -- test won't do anything\n");
+                       return;
+               }
+               printf("\tother thread: using GDT slot %d\n", desc.entry_number);
+               asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)((desc.entry_number << 3) | 0x3)));
+       }
+
+       /*
+        * Step 3: set the selector back to zero.  On AMD chips, this will
+        * preserve GSBASE.
+        */
+
+       asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+}
+
+static void *threadproc(void *ctx)
+{
+       while (1) {
+               while (ftx == 0)
+                       syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0);
+               if (ftx == 3)
+                       return NULL;
+
+               if (ftx == 1)
+                       do_remote_base();
+               else if (ftx == 2)
+                       do_unexpected_base();
+               else
+                       errx(1, "helper thread got bad command");
+
+               ftx = 0;
+               syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+       }
+}
+
+static void set_gs_and_switch_to(unsigned long local, unsigned long remote)
+{
+       unsigned long base;
+
+       bool hard_zero = false;
+       if (local == HARD_ZERO) {
+               hard_zero = true;
+               local = 0;
+       }
+
+       printf("[RUN]\tARCH_SET_GS(0x%lx)%s, then schedule to 0x%lx\n",
+              local, hard_zero ? " and clear gs" : "", remote);
+       if (syscall(SYS_arch_prctl, ARCH_SET_GS, local) != 0)
+               err(1, "ARCH_SET_GS");
+       if (hard_zero)
+               asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+       if (read_base(GS) != local) {
+               nerrs++;
+               printf("[FAIL]\tGSBASE wasn't set as expected\n");
+       }
+
+       remote_base = remote;
+       ftx = 1;
+       syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+       while (ftx != 0)
+               syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
+
+       base = read_base(GS);
+       if (base == local) {
+               printf("[OK]\tGSBASE remained 0x%lx\n", local);
+       } else {
+               nerrs++;
+               printf("[FAIL]\tGSBASE changed to 0x%lx\n", base);
+       }
+}
+
+static void test_unexpected_base(void)
+{
+       unsigned long base;
+
+       printf("[RUN]\tARCH_SET_GS(0), clear gs, then manipulate GSBASE in a different thread\n");
+       if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
+               err(1, "ARCH_SET_GS");
+       asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+       ftx = 2;
+       syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+       while (ftx != 0)
+               syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
+
+       base = read_base(GS);
+       if (base == 0) {
+               printf("[OK]\tGSBASE remained 0\n");
+       } else {
+               nerrs++;
+               printf("[FAIL]\tGSBASE changed to 0x%lx\n", base);
+       }
+}
+
+int main()
+{
+       pthread_t thread;
+
+       sethandler(SIGSEGV, sigsegv, 0);
+
+       check_gs_value(0);
+       check_gs_value(1);
+       check_gs_value(0x200000000);
+       check_gs_value(0);
+       check_gs_value(0x200000000);
+       check_gs_value(1);
+
+       for (int sched = 0; sched < 2; sched++) {
+               mov_0_gs(0, !!sched);
+               mov_0_gs(1, !!sched);
+               mov_0_gs(0x200000000, !!sched);
+       }
+
+       /* Set up for multithreading. */
+
+       cpu_set_t cpuset;
+       CPU_ZERO(&cpuset);
+       CPU_SET(0, &cpuset);
+       if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+               err(1, "sched_setaffinity to CPU 0");   /* should never fail */
+
+       if (pthread_create(&thread, 0, threadproc, 0) != 0)
+               err(1, "pthread_create");
+
+       static unsigned long bases_with_hard_zero[] = {
+               0, HARD_ZERO, 1, 0x200000000,
+       };
+
+       for (int local = 0; local < 4; local++) {
+               for (int remote = 0; remote < 4; remote++) {
+                       set_gs_and_switch_to(bases_with_hard_zero[local],
+                                            bases_with_hard_zero[remote]);
+               }
+       }
+
+       test_unexpected_base();
+
+       ftx = 3;  /* Kill the thread. */
+       syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+
+       if (pthread_join(thread, NULL) != 0)
+               err(1, "pthread_join");
+
+       return nerrs == 0 ? 0 : 1;
+}
index 31a3035cd4eb33485dc01a0247e367606fc7ae50..4af47079cf04305cec7e6a8d2aa0960a6fcfe352 100644 (file)
@@ -21,6 +21,9 @@
 #include <pthread.h>
 #include <sched.h>
 #include <linux/futex.h>
+#include <sys/mman.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
 
 #define AR_ACCESSED            (1<<8)
 
 
 static int nerrs;
 
+/* Points to an array of 1024 ints, each holding its own index. */
+static const unsigned int *counter_page;
+static struct user_desc *low_user_desc;
+static struct user_desc *low_user_desc_clear;  /* Use to delete GDT entry */
+static int gdt_entry_num;
+
 static void check_invalid_segment(uint16_t index, int ldt)
 {
        uint32_t has_limit = 0, has_ar = 0, limit, ar;
@@ -561,16 +570,257 @@ static void do_exec_test(void)
        }
 }
 
+static void setup_counter_page(void)
+{
+       unsigned int *page = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
+                        MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0);
+       if (page == MAP_FAILED)
+               err(1, "mmap");
+
+       for (int i = 0; i < 1024; i++)
+               page[i] = i;
+       counter_page = page;
+}
+
+static int invoke_set_thread_area(void)
+{
+       int ret;
+       asm volatile ("int $0x80"
+                     : "=a" (ret), "+m" (low_user_desc) :
+                       "a" (243), "b" (low_user_desc)
+                     : "flags");
+       return ret;
+}
+
+static void setup_low_user_desc(void)
+{
+       low_user_desc = mmap(NULL, 2 * sizeof(struct user_desc),
+                            PROT_READ | PROT_WRITE,
+                            MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0);
+       if (low_user_desc == MAP_FAILED)
+               err(1, "mmap");
+
+       low_user_desc->entry_number     = -1;
+       low_user_desc->base_addr        = (unsigned long)&counter_page[1];
+       low_user_desc->limit            = 0xfffff;
+       low_user_desc->seg_32bit        = 1;
+       low_user_desc->contents         = 0; /* Data, grow-up*/
+       low_user_desc->read_exec_only   = 0;
+       low_user_desc->limit_in_pages   = 1;
+       low_user_desc->seg_not_present  = 0;
+       low_user_desc->useable          = 0;
+
+       if (invoke_set_thread_area() == 0) {
+               gdt_entry_num = low_user_desc->entry_number;
+               printf("[NOTE]\tset_thread_area is available; will use GDT index %d\n", gdt_entry_num);
+       } else {
+               printf("[NOTE]\tset_thread_area is unavailable\n");
+       }
+
+       low_user_desc_clear = low_user_desc + 1;
+       low_user_desc_clear->entry_number = gdt_entry_num;
+       low_user_desc_clear->read_exec_only = 1;
+       low_user_desc_clear->seg_not_present = 1;
+}
+
+static void test_gdt_invalidation(void)
+{
+       if (!gdt_entry_num)
+               return; /* 64-bit only system -- we can't use set_thread_area */
+
+       unsigned short prev_sel;
+       unsigned short sel;
+       unsigned int eax;
+       const char *result;
+#ifdef __x86_64__
+       unsigned long saved_base;
+       unsigned long new_base;
+#endif
+
+       /* Test DS */
+       invoke_set_thread_area();
+       eax = 243;
+       sel = (gdt_entry_num << 3) | 3;
+       asm volatile ("movw %%ds, %[prev_sel]\n\t"
+                     "movw %[sel], %%ds\n\t"
+#ifdef __i386__
+                     "pushl %%ebx\n\t"
+#endif
+                     "movl %[arg1], %%ebx\n\t"
+                     "int $0x80\n\t"   /* Should invalidate ds */
+#ifdef __i386__
+                     "popl %%ebx\n\t"
+#endif
+                     "movw %%ds, %[sel]\n\t"
+                     "movw %[prev_sel], %%ds"
+                     : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+                       "+a" (eax)
+                     : "m" (low_user_desc_clear),
+                       [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+                     : "flags");
+
+       if (sel != 0) {
+               result = "FAIL";
+               nerrs++;
+       } else {
+               result = "OK";
+       }
+       printf("[%s]\tInvalidate DS with set_thread_area: new DS = 0x%hx\n",
+              result, sel);
+
+       /* Test ES */
+       invoke_set_thread_area();
+       eax = 243;
+       sel = (gdt_entry_num << 3) | 3;
+       asm volatile ("movw %%es, %[prev_sel]\n\t"
+                     "movw %[sel], %%es\n\t"
+#ifdef __i386__
+                     "pushl %%ebx\n\t"
+#endif
+                     "movl %[arg1], %%ebx\n\t"
+                     "int $0x80\n\t"   /* Should invalidate es */
+#ifdef __i386__
+                     "popl %%ebx\n\t"
+#endif
+                     "movw %%es, %[sel]\n\t"
+                     "movw %[prev_sel], %%es"
+                     : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+                       "+a" (eax)
+                     : "m" (low_user_desc_clear),
+                       [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+                     : "flags");
+
+       if (sel != 0) {
+               result = "FAIL";
+               nerrs++;
+       } else {
+               result = "OK";
+       }
+       printf("[%s]\tInvalidate ES with set_thread_area: new ES = 0x%hx\n",
+              result, sel);
+
+       /* Test FS */
+       invoke_set_thread_area();
+       eax = 243;
+       sel = (gdt_entry_num << 3) | 3;
+#ifdef __x86_64__
+       syscall(SYS_arch_prctl, ARCH_GET_FS, &saved_base);
+#endif
+       asm volatile ("movw %%fs, %[prev_sel]\n\t"
+                     "movw %[sel], %%fs\n\t"
+#ifdef __i386__
+                     "pushl %%ebx\n\t"
+#endif
+                     "movl %[arg1], %%ebx\n\t"
+                     "int $0x80\n\t"   /* Should invalidate fs */
+#ifdef __i386__
+                     "popl %%ebx\n\t"
+#endif
+                     "movw %%fs, %[sel]\n\t"
+                     : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+                       "+a" (eax)
+                     : "m" (low_user_desc_clear),
+                       [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+                     : "flags");
+
+#ifdef __x86_64__
+       syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base);
+#endif
+
+       /* Restore FS/BASE for glibc */
+       asm volatile ("movw %[prev_sel], %%fs" : : [prev_sel] "rm" (prev_sel));
+#ifdef __x86_64__
+       if (saved_base)
+               syscall(SYS_arch_prctl, ARCH_SET_FS, saved_base);
+#endif
+
+       if (sel != 0) {
+               result = "FAIL";
+               nerrs++;
+       } else {
+               result = "OK";
+       }
+       printf("[%s]\tInvalidate FS with set_thread_area: new FS = 0x%hx\n",
+              result, sel);
+
+#ifdef __x86_64__
+       if (sel == 0 && new_base != 0) {
+               nerrs++;
+               printf("[FAIL]\tNew FSBASE was 0x%lx\n", new_base);
+       } else {
+               printf("[OK]\tNew FSBASE was zero\n");
+       }
+#endif
+
+       /* Test GS */
+       invoke_set_thread_area();
+       eax = 243;
+       sel = (gdt_entry_num << 3) | 3;
+#ifdef __x86_64__
+       syscall(SYS_arch_prctl, ARCH_GET_GS, &saved_base);
+#endif
+       asm volatile ("movw %%gs, %[prev_sel]\n\t"
+                     "movw %[sel], %%gs\n\t"
+#ifdef __i386__
+                     "pushl %%ebx\n\t"
+#endif
+                     "movl %[arg1], %%ebx\n\t"
+                     "int $0x80\n\t"   /* Should invalidate gs */
+#ifdef __i386__
+                     "popl %%ebx\n\t"
+#endif
+                     "movw %%gs, %[sel]\n\t"
+                     : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+                       "+a" (eax)
+                     : "m" (low_user_desc_clear),
+                       [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+                     : "flags");
+
+#ifdef __x86_64__
+       syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base);
+#endif
+
+       /* Restore GS/BASE for glibc */
+       asm volatile ("movw %[prev_sel], %%gs" : : [prev_sel] "rm" (prev_sel));
+#ifdef __x86_64__
+       if (saved_base)
+               syscall(SYS_arch_prctl, ARCH_SET_GS, saved_base);
+#endif
+
+       if (sel != 0) {
+               result = "FAIL";
+               nerrs++;
+       } else {
+               result = "OK";
+       }
+       printf("[%s]\tInvalidate GS with set_thread_area: new GS = 0x%hx\n",
+              result, sel);
+
+#ifdef __x86_64__
+       if (sel == 0 && new_base != 0) {
+               nerrs++;
+               printf("[FAIL]\tNew GSBASE was 0x%lx\n", new_base);
+       } else {
+               printf("[OK]\tNew GSBASE was zero\n");
+       }
+#endif
+}
+
 int main(int argc, char **argv)
 {
        if (argc == 1 && !strcmp(argv[0], "ldt_gdt_test_exec"))
                return finish_exec_test();
 
+       setup_counter_page();
+       setup_low_user_desc();
+
        do_simple_tests();
 
        do_multicpu_tests();
 
        do_exec_test();
 
+       test_gdt_invalidation();
+
        return nerrs ? 1 : 0;
 }