Merge tag 'v6.8-rc4' into x86/percpu, to resolve conflicts and refresh the branch
authorIngo Molnar <mingo@kernel.org>
Wed, 14 Feb 2024 09:45:07 +0000 (10:45 +0100)
committerIngo Molnar <mingo@kernel.org>
Wed, 14 Feb 2024 09:45:07 +0000 (10:45 +0100)
Conflicts:
arch/x86/include/asm/percpu.h
arch/x86/include/asm/text-patching.h

Signed-off-by: Ingo Molnar <mingo@kernel.org>
20 files changed:
1  2 
arch/x86/Kconfig
arch/x86/entry/calling.h
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/include/asm/current.h
arch/x86/include/asm/nospec-branch.h
arch/x86/include/asm/percpu.h
arch/x86/include/asm/preempt.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/text-patching.h
arch/x86/kernel/alternative.c
arch/x86/kernel/callthunks.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/head_64.S
arch/x86/kernel/smp.c
arch/x86/kernel/traps.c
arch/x86/kernel/vmlinux.lds.S
arch/x86/lib/cmpxchg8b_emu.S
arch/x86/xen/xen-asm.S
include/linux/compiler.h

diff --combined arch/x86/Kconfig
index 54e79d3061f96220255202c0214c584f4b3d25f1,5edec175b9bfc92dfac8832fc3600b843407828b..0a31b515d1205d0246e72fb796083e74c70049b6
@@@ -28,7 -28,6 +28,6 @@@ config X86_6
        select ARCH_HAS_GIGANTIC_PAGE
        select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
        select ARCH_SUPPORTS_PER_VMA_LOCK
-       select ARCH_USE_CMPXCHG_LOCKREF
        select HAVE_ARCH_SOFT_DIRTY
        select MODULES_USE_ELF_RELA
        select NEED_DMA_MAP_STATE
@@@ -60,6 -59,7 +59,7 @@@ config X8
        #
        select ACPI_LEGACY_TABLES_LOOKUP        if ACPI
        select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
+       select ACPI_HOTPLUG_CPU                 if ACPI_PROCESSOR && HOTPLUG_CPU
        select ARCH_32BIT_OFF_T                 if X86_32
        select ARCH_CLOCKSOURCE_INIT
        select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
@@@ -72,6 -72,7 +72,7 @@@
        select ARCH_HAS_CACHE_LINE_SIZE
        select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
        select ARCH_HAS_CPU_FINALIZE_INIT
+       select ARCH_HAS_CPU_PASID               if IOMMU_SVA
        select ARCH_HAS_CURRENT_STACK_POINTER
        select ARCH_HAS_DEBUG_VIRTUAL
        select ARCH_HAS_DEBUG_VM_PGTABLE        if !X86_PAE
@@@ -89,6 -90,7 +90,7 @@@
        select ARCH_HAS_PMEM_API                if X86_64
        select ARCH_HAS_PTE_DEVMAP              if X86_64
        select ARCH_HAS_PTE_SPECIAL
+       select ARCH_HAS_HW_PTE_YOUNG
        select ARCH_HAS_NONLEAF_PMD_YOUNG       if PGTABLE_LEVELS > 2
        select ARCH_HAS_UACCESS_FLUSHCACHE      if X86_64
        select ARCH_HAS_COPY_MC                 if X86_64
        select ARCH_SUPPORTS_LTO_CLANG
        select ARCH_SUPPORTS_LTO_CLANG_THIN
        select ARCH_USE_BUILTIN_BSWAP
+       select ARCH_USE_CMPXCHG_LOCKREF         if X86_CMPXCHG64
        select ARCH_USE_MEMTEST
        select ARCH_USE_QUEUED_RWLOCKS
        select ARCH_USE_QUEUED_SPINLOCKS
        select GENERIC_CLOCKEVENTS_MIN_ADJUST
        select GENERIC_CMOS_UPDATE
        select GENERIC_CPU_AUTOPROBE
+       select GENERIC_CPU_DEVICES
        select GENERIC_CPU_VULNERABILITIES
        select GENERIC_EARLY_IOREMAP
        select GENERIC_ENTRY
        select HAS_IOPORT
        select HAVE_ACPI_APEI                   if ACPI
        select HAVE_ACPI_APEI_NMI               if ACPI
-       select HAVE_ALIGNED_STRUCT_PAGE         if SLUB
+       select HAVE_ALIGNED_STRUCT_PAGE
        select HAVE_ARCH_AUDITSYSCALL
        select HAVE_ARCH_HUGE_VMAP              if X86_64 || X86_PAE
        select HAVE_ARCH_HUGE_VMALLOC           if X86_64
@@@ -384,10 -388,6 +388,6 @@@ config HAVE_INTEL_TX
        def_bool y
        depends on INTEL_IOMMU && ACPI
  
- config X86_32_SMP
-       def_bool y
-       depends on X86_32 && SMP
  config X86_64_SMP
        def_bool y
        depends on X86_64 && SMP
@@@ -1313,16 -1313,41 +1313,41 @@@ config MICROCOD
        def_bool y
        depends on CPU_SUP_AMD || CPU_SUP_INTEL
  
+ config MICROCODE_INITRD32
+       def_bool y
+       depends on MICROCODE && X86_32 && BLK_DEV_INITRD
  config MICROCODE_LATE_LOADING
        bool "Late microcode loading (DANGEROUS)"
        default n
-       depends on MICROCODE
+       depends on MICROCODE && SMP
        help
          Loading microcode late, when the system is up and executing instructions
          is a tricky business and should be avoided if possible. Just the sequence
          of synchronizing all cores and SMT threads is one fragile dance which does
          not guarantee that cores might not softlock after the loading. Therefore,
-         use this at your own risk. Late loading taints the kernel too.
+         use this at your own risk. Late loading taints the kernel unless the
+         microcode header indicates that it is safe for late loading via the
+         minimal revision check. This minimal revision check can be enforced on
+         the kernel command line with "microcode.minrev=Y".
+ config MICROCODE_LATE_FORCE_MINREV
+       bool "Enforce late microcode loading minimal revision check"
+       default n
+       depends on MICROCODE_LATE_LOADING
+       help
+         To prevent that users load microcode late which modifies already
+         in use features, newer microcode patches have a minimum revision field
+         in the microcode header, which tells the kernel which minimum
+         revision must be active in the CPU to safely load that new microcode
+         late into the running system. If disabled the check will not
+         be enforced but the kernel will be tainted when the minimal
+         revision check fails.
+         This minimal revision check can also be controlled via the
+         "microcode.minrev" parameter on the kernel command line.
+         If unsure say Y.
  
  config X86_MSR
        tristate "/dev/cpu/*/msr - Model-specific register support"
@@@ -1390,7 -1415,7 +1415,7 @@@ config HIGHMEM4
  
  config HIGHMEM64G
        bool "64GB"
-       depends on !M486SX && !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !MWINCHIP3D && !MK6
+       depends on X86_HAVE_PAE
        select X86_PAE
        help
          Select this if you have a 32-bit processor and more than 4
@@@ -1447,7 -1472,7 +1472,7 @@@ config HIGHME
  
  config X86_PAE
        bool "PAE (Physical Address Extension) Support"
-       depends on X86_32 && !HIGHMEM4G
+       depends on X86_32 && X86_HAVE_PAE
        select PHYS_ADDR_T_64BIT
        select SWIOTLB
        help
@@@ -1534,6 -1559,7 +1559,7 @@@ config NUM
        depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP)
        default y if X86_BIGSMP
        select USE_PERCPU_NUMA_NODE_ID
+       select OF_NUMA if OF
        help
          Enable NUMA (Non-Uniform Memory Access) support.
  
@@@ -1939,12 -1965,30 +1965,30 @@@ config X86_USER_SHADOW_STAC
  
          If unsure, say N.
  
+ config INTEL_TDX_HOST
+       bool "Intel Trust Domain Extensions (TDX) host support"
+       depends on CPU_SUP_INTEL
+       depends on X86_64
+       depends on KVM_INTEL
+       depends on X86_X2APIC
+       select ARCH_KEEP_MEMBLOCK
+       depends on CONTIG_ALLOC
+       depends on !KEXEC_CORE
+       depends on X86_MCE
+       help
+         Intel Trust Domain Extensions (TDX) protects guest VMs from malicious
+         host and certain physical attacks.  This option enables necessary TDX
+         support in the host kernel to run confidential VMs.
+         If unsure, say N.
  config EFI
        bool "EFI runtime service support"
        depends on ACPI
        select UCS2_STRING
        select EFI_RUNTIME_WRAPPERS
        select ARCH_USE_MEMREMAP_PROT
+       select EFI_RUNTIME_MAP if KEXEC_CORE
        help
          This enables the kernel to use EFI runtime services that are
          available (such as the EFI variable services).
@@@ -2020,7 -2064,6 +2064,6 @@@ config EFI_MAX_FAKE_ME
  config EFI_RUNTIME_MAP
        bool "Export EFI runtime maps to sysfs" if EXPERT
        depends on EFI
-       default KEXEC_CORE
        help
          Export EFI runtime memory regions to /sys/firmware/efi/runtime-map.
          That memory map is required by the 2nd kernel to set up EFI virtual
@@@ -2034,7 -2077,7 +2077,7 @@@ config ARCH_SUPPORTS_KEXE
        def_bool y
  
  config ARCH_SUPPORTS_KEXEC_FILE
-       def_bool X86_64 && CRYPTO && CRYPTO_SHA256
+       def_bool X86_64
  
  config ARCH_SELECTS_KEXEC_FILE
        def_bool y
        select HAVE_IMA_KEXEC if IMA
  
  config ARCH_SUPPORTS_KEXEC_PURGATORY
-       def_bool KEXEC_FILE
+       def_bool y
  
  config ARCH_SUPPORTS_KEXEC_SIG
        def_bool y
@@@ -2062,6 -2105,9 +2105,9 @@@ config ARCH_SUPPORTS_CRASH_DUM
  config ARCH_SUPPORTS_CRASH_HOTPLUG
        def_bool y
  
+ config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+       def_bool CRASH_CORE
  config PHYSICAL_START
        hex "Physical address where the kernel is loaded" if (EXPERT || CRASH_DUMP)
        default "0x1000000"
@@@ -2388,18 -2434,6 +2434,18 @@@ source "kernel/livepatch/Kconfig
  
  endmenu
  
 +config CC_HAS_NAMED_AS
 +      def_bool CC_IS_GCC && GCC_VERSION >= 120100
 +
 +config USE_X86_SEG_SUPPORT
 +      def_bool y
 +      depends on CC_HAS_NAMED_AS
 +      #
 +      # -fsanitize=kernel-address (KASAN) is at the moment incompatible
 +      # with named address spaces - see GCC PR sanitizer/111736.
 +      #
 +      depends on !KASAN
 +
  config CC_HAS_SLS
        def_bool $(cc-option,-mharden-sls=all)
  
@@@ -2966,6 -3000,15 +3012,15 @@@ config IA32_EMULATIO
          64-bit kernel. You should likely turn this on, unless you're
          100% sure that you don't have any 32-bit programs left.
  
+ config IA32_EMULATION_DEFAULT_DISABLED
+       bool "IA32 emulation disabled by default"
+       default n
+       depends on IA32_EMULATION
+       help
+         Make IA32 emulation disabled by default. This prevents loading 32-bit
+         processes and access to 32-bit syscalls. If unsure, leave it to its
+         default value.
  config X86_X32_ABI
        bool "x32 ABI for 64-bit mode"
        depends on X86_64
diff --combined arch/x86/entry/calling.h
index 47368ab0bda0edd7635e0540525a43d34ba3a1ed,9f1d94790a54912cc431e9e39fc0a4a7e6069398..e59d3073e7cf2e4daaccf986108c5c8ea21c1504
@@@ -173,10 -173,9 +173,9 @@@ For 32-bit we have the following conven
  .endm
  
  #define THIS_CPU_user_pcid_flush_mask   \
 -      PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
 +      PER_CPU_VAR(cpu_tlbstate + TLB_STATE_user_pcid_flush_mask)
  
- .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
-       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+ .macro SWITCH_TO_USER_CR3 scratch_reg:req scratch_reg2:req
        mov     %cr3, \scratch_reg
  
        ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
        /* Flip the PGD to the user version */
        orq     $(PTI_USER_PGTABLE_MASK), \scratch_reg
        mov     \scratch_reg, %cr3
+ .endm
+ .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+       SWITCH_TO_USER_CR3 \scratch_reg \scratch_reg2
  .Lend_\@:
  .endm
  
  .macro SWITCH_TO_USER_CR3_STACK       scratch_reg:req
+       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
        pushq   %rax
-       SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
+       SWITCH_TO_USER_CR3 scratch_reg=\scratch_reg scratch_reg2=%rax
        popq    %rax
+ .Lend_\@:
  .endm
  
  .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
index d4e094b2c877f3f1c32d7436a1af9ea293689ee3,c73047bf9f4bff9c4631c0eab383cedceda41918..4e295798638b8176de7bd5f341f063124c60ce49
  .macro CHECK_AND_APPLY_ESPFIX
  #ifdef CONFIG_X86_ESPFIX32
  #define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8)
 -#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET
 +#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page + GDT_ESPFIX_OFFSET)
  
        ALTERNATIVE     "jmp .Lend_\@", "", X86_BUG_ESPFIX
  
@@@ -837,7 -837,7 +837,7 @@@ SYM_FUNC_START(entry_SYSENTER_32
  
        movl    %esp, %eax
        call    do_SYSENTER_32
-       testl   %eax, %eax
+       testb   %al, %al
        jz      .Lsyscall_32_done
  
        STACKLEAK_ERASE
index 6d236652fceb6df8f09f3291378dea7311b3da12,c40f89ab1b4c70a18b632a50c1e659e3fd83cfa9..567d973eed0381810ed276a5b8c8561573a13374
@@@ -18,6 -18,7 +18,7 @@@
   * - SYM_FUNC_START/END:Define functions in the symbol table.
   * - idtentry:                Define exception entry points.
   */
+ #include <linux/export.h>
  #include <linux/linkage.h>
  #include <asm/segment.h>
  #include <asm/cache.h>
@@@ -34,7 -35,6 +35,6 @@@
  #include <asm/asm.h>
  #include <asm/smap.h>
  #include <asm/pgtable_types.h>
- #include <asm/export.h>
  #include <asm/frame.h>
  #include <asm/trapnr.h>
  #include <asm/nospec-branch.h>
@@@ -126,70 -126,8 +126,8 @@@ SYM_INNER_LABEL(entry_SYSCALL_64_after_
         * In the Xen PV case we must use iret anyway.
         */
  
-       ALTERNATIVE "", "jmp    swapgs_restore_regs_and_return_to_usermode", \
-               X86_FEATURE_XENPV
-       movq    RCX(%rsp), %rcx
-       movq    RIP(%rsp), %r11
-       cmpq    %rcx, %r11      /* SYSRET requires RCX == RIP */
-       jne     swapgs_restore_regs_and_return_to_usermode
-       /*
-        * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
-        * in kernel space.  This essentially lets the user take over
-        * the kernel, since userspace controls RSP.
-        *
-        * If width of "canonical tail" ever becomes variable, this will need
-        * to be updated to remain correct on both old and new CPUs.
-        *
-        * Change top bits to match most significant bit (47th or 56th bit
-        * depending on paging mode) in the address.
-        */
- #ifdef CONFIG_X86_5LEVEL
-       ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
-               "shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
- #else
-       shl     $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
-       sar     $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
- #endif
-       /* If this changed %rcx, it was not canonical */
-       cmpq    %rcx, %r11
-       jne     swapgs_restore_regs_and_return_to_usermode
-       cmpq    $__USER_CS, CS(%rsp)            /* CS must match SYSRET */
-       jne     swapgs_restore_regs_and_return_to_usermode
-       movq    R11(%rsp), %r11
-       cmpq    %r11, EFLAGS(%rsp)              /* R11 == RFLAGS */
-       jne     swapgs_restore_regs_and_return_to_usermode
-       /*
-        * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
-        * restore RF properly. If the slowpath sets it for whatever reason, we
-        * need to restore it correctly.
-        *
-        * SYSRET can restore TF, but unlike IRET, restoring TF results in a
-        * trap from userspace immediately after SYSRET.  This would cause an
-        * infinite loop whenever #DB happens with register state that satisfies
-        * the opportunistic SYSRET conditions.  For example, single-stepping
-        * this user code:
-        *
-        *           movq       $stuck_here, %rcx
-        *           pushfq
-        *           popq %r11
-        *   stuck_here:
-        *
-        * would never get past 'stuck_here'.
-        */
-       testq   $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
-       jnz     swapgs_restore_regs_and_return_to_usermode
-       /* nothing to check for RSP */
-       cmpq    $__USER_DS, SS(%rsp)            /* SS must match SYSRET */
-       jne     swapgs_restore_regs_and_return_to_usermode
+       ALTERNATIVE "testb %al, %al; jz swapgs_restore_regs_and_return_to_usermode", \
+               "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
  
        /*
         * We win! This label is here just for ease of understanding
@@@ -252,7 -190,7 +190,7 @@@ SYM_FUNC_START(__switch_to_asm
  
  #ifdef CONFIG_STACKPROTECTOR
        movq    TASK_stack_canary(%rsi), %rbx
 -      movq    %rbx, PER_CPU_VAR(fixed_percpu_data) + FIXED_stack_canary
 +      movq    %rbx, PER_CPU_VAR(fixed_percpu_data + FIXED_stack_canary)
  #endif
  
        /*
@@@ -621,17 -559,27 +559,27 @@@ __irqentry_text_end
  SYM_CODE_START_LOCAL(common_interrupt_return)
  SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
        IBRS_EXIT
- #ifdef CONFIG_DEBUG_ENTRY
-       /* Assert that pt_regs indicates user mode. */
-       testb   $3, CS(%rsp)
-       jnz     1f
-       ud2
- 1:
- #endif
  #ifdef CONFIG_XEN_PV
        ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
  #endif
+ #ifdef CONFIG_PAGE_TABLE_ISOLATION
+       ALTERNATIVE "", "jmp .Lpti_restore_regs_and_return_to_usermode", X86_FEATURE_PTI
+ #endif
+       STACKLEAK_ERASE
+       POP_REGS
+       add     $8, %rsp        /* orig_ax */
+       UNWIND_HINT_IRET_REGS
  
+ .Lswapgs_and_iret:
+       swapgs
+       /* Assert that the IRET frame indicates user mode. */
+       testb   $3, 8(%rsp)
+       jnz     .Lnative_iret
+       ud2
+ #ifdef CONFIG_PAGE_TABLE_ISOLATION
+ .Lpti_restore_regs_and_return_to_usermode:
        POP_REGS pop_rdi=0
  
        /*
         */
        STACKLEAK_ERASE_NOCLOBBER
  
-       SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+       push    %rax
+       SWITCH_TO_USER_CR3 scratch_reg=%rdi scratch_reg2=%rax
+       pop     %rax
  
        /* Restore RDI. */
        popq    %rdi
-       swapgs
-       jmp     .Lnative_iret
+       jmp     .Lswapgs_and_iret
+ #endif
  
  SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL)
  #ifdef CONFIG_DEBUG_ENTRY
@@@ -1509,18 -1458,16 +1458,16 @@@ nmi_restore
        iretq
  SYM_CODE_END(asm_exc_nmi)
  
- #ifndef CONFIG_IA32_EMULATION
  /*
   * This handles SYSCALL from 32-bit code.  There is no way to program
   * MSRs to fully disable 32-bit SYSCALL.
   */
- SYM_CODE_START(ignore_sysret)
+ SYM_CODE_START(entry_SYSCALL32_ignore)
        UNWIND_HINT_END_OF_STACK
        ENDBR
        mov     $-ENOSYS, %eax
        sysretl
- SYM_CODE_END(ignore_sysret)
- #endif
+ SYM_CODE_END(entry_SYSCALL32_ignore)
  
  .pushsection .text, "ax"
        __FUNC_ALIGN
index c8c5674d69f644432ab5eec30a7d1115789e76ac,dd4b67101bb7ed40013584f66cc80e805ea0ece1..fb7702d4170c554dd9f34737d76ac69e7b07fee0
@@@ -2,6 -2,7 +2,7 @@@
  #ifndef _ASM_X86_CURRENT_H
  #define _ASM_X86_CURRENT_H
  
+ #include <linux/build_bug.h>
  #include <linux/compiler.h>
  
  #ifndef __ASSEMBLY__
@@@ -36,15 -37,8 +37,15 @@@ static_assert(sizeof(struct pcpu_hot) =
  
  DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot);
  
 +/* const-qualified alias to pcpu_hot, aliased by linker. */
 +DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override,
 +                      const_pcpu_hot);
 +
  static __always_inline struct task_struct *get_current(void)
  {
 +      if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
 +              return this_cpu_read_const(const_pcpu_hot.current_task);
 +
        return this_cpu_read_stable(pcpu_hot.current_task);
  }
  
index 65fbf6b853afaece27fdbb3f5286b87aae09fb7b,262e65539f83c86d140552305c8a9d330b313c20..691ff1ef701b623b8d671ea89b9b258cd25562be
@@@ -49,7 -49,7 +49,7 @@@
   * but there is still a cushion vs. the RSB depth. The algorithm does not
   * claim to be perfect and it can be speculated around by the CPU, but it
   * is considered that it obfuscates the problem enough to make exploitation
-  * extremly difficult.
+  * extremely difficult.
   */
  #define RET_DEPTH_SHIFT                       5
  #define RSB_RET_STUFF_LOOPS           16
  
  #ifdef CONFIG_CALL_THUNKS_DEBUG
  # define CALL_THUNKS_DEBUG_INC_CALLS                          \
 -      incq    %gs:__x86_call_count;
 +      incq    PER_CPU_VAR(__x86_call_count);
  # define CALL_THUNKS_DEBUG_INC_RETS                           \
 -      incq    %gs:__x86_ret_count;
 +      incq    PER_CPU_VAR(__x86_ret_count);
  # define CALL_THUNKS_DEBUG_INC_STUFFS                         \
 -      incq    %gs:__x86_stuffs_count;
 +      incq    PER_CPU_VAR(__x86_stuffs_count);
  # define CALL_THUNKS_DEBUG_INC_CTXSW                          \
 -      incq    %gs:__x86_ctxsw_count;
 +      incq    PER_CPU_VAR(__x86_ctxsw_count);
  #else
  # define CALL_THUNKS_DEBUG_INC_CALLS
  # define CALL_THUNKS_DEBUG_INC_RETS
@@@ -80,6 -80,9 +80,6 @@@
  #define CREDIT_CALL_DEPTH                                     \
        movq    $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
  
 -#define ASM_CREDIT_CALL_DEPTH                                 \
 -      movq    $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
 -
  #define RESET_CALL_DEPTH                                      \
        xor     %eax, %eax;                                     \
        bts     $63, %rax;                                      \
        CALL_THUNKS_DEBUG_INC_CALLS
  
  #define INCREMENT_CALL_DEPTH                                  \
 -      sarq    $5, %gs:pcpu_hot + X86_call_depth;              \
 -      CALL_THUNKS_DEBUG_INC_CALLS
 -
 -#define ASM_INCREMENT_CALL_DEPTH                              \
        sarq    $5, PER_CPU_VAR(pcpu_hot + X86_call_depth);     \
        CALL_THUNKS_DEBUG_INC_CALLS
  
  #else
  #define CREDIT_CALL_DEPTH
 -#define ASM_CREDIT_CALL_DEPTH
  #define RESET_CALL_DEPTH
 -#define INCREMENT_CALL_DEPTH
 -#define ASM_INCREMENT_CALL_DEPTH
  #define RESET_CALL_DEPTH_FROM_CALL
 +#define INCREMENT_CALL_DEPTH
  #endif
  
  /*
        jnz     771b;                                   \
        /* barrier for jnz misprediction */             \
        lfence;                                         \
 -      ASM_CREDIT_CALL_DEPTH                           \
 +      CREDIT_CALL_DEPTH                               \
        CALL_THUNKS_DEBUG_INC_CTXSW
  #else
  /*
  .macro ANNOTATE_RETPOLINE_SAFE
  .Lhere_\@:
        .pushsection .discard.retpoline_safe
-       .long .Lhere_\@ - .
+       .long .Lhere_\@
        .popsection
  .endm
  
  
  /*
   * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
-  * eventually turn into it's own annotation.
+  * eventually turn into its own annotation.
   */
  .macro VALIDATE_UNRET_END
  #if defined(CONFIG_NOINSTR_VALIDATION) && \
  .Lskip_rsb_\@:
  .endm
  
- #ifdef CONFIG_CPU_UNRET_ENTRY
+ #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)
  #define CALL_UNTRAIN_RET      "call entry_untrain_ret"
  #else
  #define CALL_UNTRAIN_RET      ""
   * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
   * where we have a stack but before any RET instruction.
   */
- .macro UNTRAIN_RET
- #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
-       defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
+ .macro __UNTRAIN_RET ibpb_feature, call_depth_insns
+ #if defined(CONFIG_RETHUNK) || defined(CONFIG_CPU_IBPB_ENTRY)
        VALIDATE_UNRET_END
        ALTERNATIVE_3 "",                                               \
                      CALL_UNTRAIN_RET, X86_FEATURE_UNRET,              \
-                     "call entry_ibpb", X86_FEATURE_ENTRY_IBPB,        \
-                     __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
+                     "call entry_ibpb", \ibpb_feature,                 \
+                    __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH
  #endif
  .endm
  
- .macro UNTRAIN_RET_VM
- #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
-       defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
-       VALIDATE_UNRET_END
-       ALTERNATIVE_3 "",                                               \
-                     CALL_UNTRAIN_RET, X86_FEATURE_UNRET,              \
-                     "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT,    \
-                     __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
- #endif
- .endm
+ #define UNTRAIN_RET \
+       __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH)
  
- .macro UNTRAIN_RET_FROM_CALL
- #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
-       defined(CONFIG_CALL_DEPTH_TRACKING)
-       VALIDATE_UNRET_END
-       ALTERNATIVE_3 "",                                               \
-                     CALL_UNTRAIN_RET, X86_FEATURE_UNRET,              \
-                     "call entry_ibpb", X86_FEATURE_ENTRY_IBPB,        \
-                     __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH
- #endif
- .endm
+ #define UNTRAIN_RET_VM \
+       __UNTRAIN_RET X86_FEATURE_IBPB_ON_VMEXIT, __stringify(RESET_CALL_DEPTH)
+ #define UNTRAIN_RET_FROM_CALL \
+       __UNTRAIN_RET X86_FEATURE_ENTRY_IBPB, __stringify(RESET_CALL_DEPTH_FROM_CALL)
  
  
  .macro CALL_DEPTH_ACCOUNT
  #ifdef CONFIG_CALL_DEPTH_TRACKING
        ALTERNATIVE "",                                                 \
 -                  __stringify(ASM_INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
 +                  __stringify(INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
  #endif
  .endm
  
  #define ANNOTATE_RETPOLINE_SAFE                                       \
        "999:\n\t"                                              \
        ".pushsection .discard.retpoline_safe\n\t"              \
-       ".long 999b - .\n\t"                                    \
+       ".long 999b\n\t"                                        \
        ".popsection\n\t"
  
  typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
@@@ -339,13 -334,23 +325,23 @@@ extern void __x86_return_thunk(void)
  static inline void __x86_return_thunk(void) {}
  #endif
  
+ #ifdef CONFIG_CPU_UNRET_ENTRY
  extern void retbleed_return_thunk(void);
+ #else
+ static inline void retbleed_return_thunk(void) {}
+ #endif
+ #ifdef CONFIG_CPU_SRSO
  extern void srso_return_thunk(void);
  extern void srso_alias_return_thunk(void);
+ #else
+ static inline void srso_return_thunk(void) {}
+ static inline void srso_alias_return_thunk(void) {}
+ #endif
  
- extern void retbleed_untrain_ret(void);
- extern void srso_untrain_ret(void);
- extern void srso_alias_untrain_ret(void);
+ extern void retbleed_return_thunk(void);
+ extern void srso_return_thunk(void);
+ extern void srso_alias_return_thunk(void);
  
  extern void entry_untrain_ret(void);
  extern void entry_ibpb(void);
  extern void (*x86_return_thunk)(void);
  
  #ifdef CONFIG_CALL_DEPTH_TRACKING
- extern void __x86_return_skl(void);
- static inline void x86_set_skl_return_thunk(void)
- {
-       x86_return_thunk = &__x86_return_skl;
- }
+ extern void call_depth_return_thunk(void);
  
  #define CALL_DEPTH_ACCOUNT                                    \
        ALTERNATIVE("",                                         \
@@@ -371,12 -371,12 +362,12 @@@ DECLARE_PER_CPU(u64, __x86_ret_count)
  DECLARE_PER_CPU(u64, __x86_stuffs_count);
  DECLARE_PER_CPU(u64, __x86_ctxsw_count);
  #endif
- #else
- static inline void x86_set_skl_return_thunk(void) {}
+ #else /* !CONFIG_CALL_DEPTH_TRACKING */
  
+ static inline void call_depth_return_thunk(void) {}
  #define CALL_DEPTH_ACCOUNT ""
  
- #endif
+ #endif /* CONFIG_CALL_DEPTH_TRACKING */
  
  #ifdef CONFIG_RETPOLINE
  
index e56a37886143a86a9065cd49ee3415b13e67eb06,5e01883eb51ee8e576e70db0577bfbe0c20c2e4f..44958ebaf626e20c970acaacaad012f93cba2671
@@@ -4,21 -4,17 +4,21 @@@
  
  #ifdef CONFIG_X86_64
  #define __percpu_seg          gs
 +#define __percpu_rel          (%rip)
  #else
  #define __percpu_seg          fs
 +#define __percpu_rel
  #endif
  
  #ifdef __ASSEMBLY__
  
  #ifdef CONFIG_SMP
 -#define PER_CPU_VAR(var)      %__percpu_seg:var
 -#else /* ! SMP */
 -#define PER_CPU_VAR(var)      var
 -#endif        /* SMP */
 +#define __percpu              %__percpu_seg:
 +#else
 +#define __percpu
 +#endif
 +
 +#define PER_CPU_VAR(var)      __percpu(var)__percpu_rel
  
  #ifdef CONFIG_X86_64_SMP
  #define INIT_PER_CPU_VAR(var)  init_per_cpu__##var
  
  #else /* ...!ASSEMBLY */
  
- #include <linux/kernel.h>
 +#include <linux/build_bug.h>
  #include <linux/stringify.h>
+ #include <asm/asm.h>
  
  #ifdef CONFIG_SMP
 +
 +#ifdef CONFIG_CC_HAS_NAMED_AS
 +
 +#ifdef __CHECKER__
 +#define __seg_gs              __attribute__((address_space(__seg_gs)))
 +#define __seg_fs              __attribute__((address_space(__seg_fs)))
 +#endif
 +
 +#ifdef CONFIG_X86_64
 +#define __percpu_seg_override __seg_gs
 +#else
 +#define __percpu_seg_override __seg_fs
 +#endif
 +
 +#define __percpu_prefix               ""
 +
 +#else /* CONFIG_CC_HAS_NAMED_AS */
 +
 +#define __percpu_seg_override
  #define __percpu_prefix               "%%"__stringify(__percpu_seg)":"
 +
 +#endif /* CONFIG_CC_HAS_NAMED_AS */
 +
 +#define __force_percpu_prefix "%%"__stringify(__percpu_seg)":"
  #define __my_cpu_offset               this_cpu_read(this_cpu_off)
  
 +#ifdef CONFIG_USE_X86_SEG_SUPPORT
 +/*
 + * Efficient implementation for cases in which the compiler supports
 + * named address spaces.  Allows the compiler to perform additional
 + * optimizations that can save more instructions.
 + */
 +#define arch_raw_cpu_ptr(ptr)                                 \
 +({                                                            \
 +      unsigned long tcp_ptr__;                                \
 +      tcp_ptr__ = __raw_cpu_read(, this_cpu_off);             \
 +                                                              \
 +      tcp_ptr__ += (unsigned long)(ptr);                      \
 +      (typeof(*(ptr)) __kernel __force *)tcp_ptr__;           \
 +})
 +#else /* CONFIG_USE_X86_SEG_SUPPORT */
  /*
   * Compared to the generic __my_cpu_offset version, the following
   * saves one instruction and avoids clobbering a temp register.
   */
 -#define arch_raw_cpu_ptr(ptr)                         \
 -({                                                    \
 -      unsigned long tcp_ptr__;                        \
 -      asm ("add " __percpu_arg(1) ", %0"              \
 -           : "=r" (tcp_ptr__)                         \
 -           : "m" (this_cpu_off), "0" (ptr));          \
 -      (typeof(*(ptr)) __kernel __force *)tcp_ptr__;   \
 +#define arch_raw_cpu_ptr(ptr)                                 \
 +({                                                            \
 +      unsigned long tcp_ptr__;                                \
 +      asm ("mov " __percpu_arg(1) ", %0"                      \
 +           : "=r" (tcp_ptr__)                                 \
 +           : "m" (__my_cpu_var(this_cpu_off)));               \
 +                                                              \
 +      tcp_ptr__ += (unsigned long)(ptr);                      \
 +      (typeof(*(ptr)) __kernel __force *)tcp_ptr__;           \
  })
 -#else
 +#endif /* CONFIG_USE_X86_SEG_SUPPORT */
 +
 +#define PER_CPU_VAR(var)      %__percpu_seg:(var)__percpu_rel
 +
 +#else /* CONFIG_SMP */
 +#define __percpu_seg_override
  #define __percpu_prefix               ""
 -#endif
 +#define __force_percpu_prefix ""
 +
 +#define PER_CPU_VAR(var)      (var)__percpu_rel
  
 +#endif /* CONFIG_SMP */
 +
 +#define __my_cpu_type(var)    typeof(var) __percpu_seg_override
 +#define __my_cpu_ptr(ptr)     (__my_cpu_type(*ptr) *)(uintptr_t)(ptr)
 +#define __my_cpu_var(var)     (*__my_cpu_ptr(&var))
  #define __percpu_arg(x)               __percpu_prefix "%" #x
 +#define __force_percpu_arg(x) __force_percpu_prefix "%" #x
  
  /*
   * Initialized pointers to per-cpu variables needed for the boot
@@@ -165,14 -107,14 +165,14 @@@ do {                                                                    
                (void)pto_tmp__;                                        \
        }                                                               \
        asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var]))   \
 -          : [var] "+m" (_var)                                         \
 +          : [var] "+m" (__my_cpu_var(_var))                           \
            : [val] __pcpu_reg_imm_##size(pto_val__));                  \
  } while (0)
  
  #define percpu_unary_op(size, qual, op, _var)                         \
  ({                                                                    \
        asm qual (__pcpu_op1_##size(op, __percpu_arg([var]))            \
 -          : [var] "+m" (_var));                                       \
 +          : [var] "+m" (__my_cpu_var(_var)));                         \
  })
  
  /*
@@@ -202,16 -144,16 +202,16 @@@ do {                                                                    
        __pcpu_type_##size pfo_val__;                                   \
        asm qual (__pcpu_op2_##size(op, __percpu_arg([var]), "%[val]")  \
            : [val] __pcpu_reg_##size("=", pfo_val__)                   \
 -          : [var] "m" (_var));                                        \
 +          : [var] "m" (__my_cpu_var(_var)));                          \
        (typeof(_var))(unsigned long) pfo_val__;                        \
  })
  
  #define percpu_stable_op(size, op, _var)                              \
  ({                                                                    \
        __pcpu_type_##size pfo_val__;                                   \
 -      asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]")       \
 +      asm(__pcpu_op2_##size(op, __force_percpu_arg(a[var]), "%[val]") \
            : [val] __pcpu_reg_##size("=", pfo_val__)                   \
 -          : [var] "p" (&(_var)));                                     \
 +          : [var] "i" (&(_var)));                                     \
        (typeof(_var))(unsigned long) pfo_val__;                        \
  })
  
        asm qual (__pcpu_op2_##size("xadd", "%[tmp]",                   \
                                     __percpu_arg([var]))               \
                  : [tmp] __pcpu_reg_##size("+", paro_tmp__),           \
 -                  [var] "+m" (_var)                                   \
 +                  [var] "+m" (__my_cpu_var(_var))                     \
                  : : "memory");                                        \
        (typeof(_var))(unsigned long) (paro_tmp__ + _val);              \
  })
                                    __percpu_arg([var]))                \
                  "\n\tjnz 1b"                                          \
                  : [oval] "=&a" (pxo_old__),                           \
 -                  [var] "+m" (_var)                                   \
 +                  [var] "+m" (__my_cpu_var(_var))                     \
                  : [nval] __pcpu_reg_##size(, pxo_new__)               \
                  : "memory");                                          \
        (typeof(_var))(unsigned long) pxo_old__;                        \
        asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]",               \
                                    __percpu_arg([var]))                \
                  : [oval] "+a" (pco_old__),                            \
 -                  [var] "+m" (_var)                                   \
 +                  [var] "+m" (__my_cpu_var(_var))                     \
                  : [nval] __pcpu_reg_##size(, pco_new__)               \
                  : "memory");                                          \
        (typeof(_var))(unsigned long) pco_old__;                        \
                  CC_SET(z)                                             \
                  : CC_OUT(z) (success),                                \
                    [oval] "+a" (pco_old__),                            \
 -                  [var] "+m" (_var)                                   \
 +                  [var] "+m" (__my_cpu_var(_var))                     \
                  : [nval] __pcpu_reg_##size(, pco_new__)               \
                  : "memory");                                          \
        if (unlikely(!success))                                         \
                                                                        \
        asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu",            \
                              "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
 -                : [var] "+m" (_var),                                  \
 +                : [var] "+m" (__my_cpu_var(_var)),                    \
                    "+a" (old__.low),                                   \
                    "+d" (old__.high)                                   \
                  : "b" (new__.low),                                    \
                              "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
                  CC_SET(z)                                             \
                  : CC_OUT(z) (success),                                \
 -                  [var] "+m" (_var),                                  \
 +                  [var] "+m" (__my_cpu_var(_var)),                    \
                    "+a" (old__.low),                                   \
                    "+d" (old__.high)                                   \
                  : "b" (new__.low),                                    \
                                                                        \
        asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu",           \
                              "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
 -                : [var] "+m" (_var),                                  \
 +                : [var] "+m" (__my_cpu_var(_var)),                    \
                    "+a" (old__.low),                                   \
                    "+d" (old__.high)                                   \
                  : "b" (new__.low),                                    \
                              "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
                  CC_SET(z)                                             \
                  : CC_OUT(z) (success),                                \
 -                  [var] "+m" (_var),                                  \
 +                  [var] "+m" (__my_cpu_var(_var)),                    \
                    "+a" (old__.low),                                   \
                    "+d" (old__.high)                                   \
                  : "b" (new__.low),                                    \
   * accessed while this_cpu_read_stable() allows the value to be cached.
   * this_cpu_read_stable() is more efficient and can be used if its value
   * is guaranteed to be valid across cpus.  The current users include
 - * get_current() and get_thread_info() both of which are actually
 - * per-thread variables implemented as per-cpu variables and thus
 - * stable for the duration of the respective task.
 + * pcpu_hot.current_task and pcpu_hot.top_of_stack, both of which are
 + * actually per-thread variables implemented as per-CPU variables and
 + * thus stable for the duration of the respective task.
   */
  #define this_cpu_read_stable_1(pcp)   percpu_stable_op(1, "mov", pcp)
  #define this_cpu_read_stable_2(pcp)   percpu_stable_op(2, "mov", pcp)
  #define this_cpu_read_stable_8(pcp)   percpu_stable_op(8, "mov", pcp)
  #define this_cpu_read_stable(pcp)     __pcpu_size_call_return(this_cpu_read_stable_, pcp)
  
 +#ifdef CONFIG_USE_X86_SEG_SUPPORT
 +
 +#define __raw_cpu_read(qual, pcp)                                     \
 +({                                                                    \
 +      *(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp));               \
 +})
 +
 +#define __raw_cpu_write(qual, pcp, val)                                       \
 +do {                                                                  \
 +      *(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp)) = (val);       \
 +} while (0)
 +
 +#define raw_cpu_read_1(pcp)           __raw_cpu_read(, pcp)
 +#define raw_cpu_read_2(pcp)           __raw_cpu_read(, pcp)
 +#define raw_cpu_read_4(pcp)           __raw_cpu_read(, pcp)
 +#define raw_cpu_write_1(pcp, val)     __raw_cpu_write(, pcp, val)
 +#define raw_cpu_write_2(pcp, val)     __raw_cpu_write(, pcp, val)
 +#define raw_cpu_write_4(pcp, val)     __raw_cpu_write(, pcp, val)
 +
 +#define this_cpu_read_1(pcp)          __raw_cpu_read(volatile, pcp)
 +#define this_cpu_read_2(pcp)          __raw_cpu_read(volatile, pcp)
 +#define this_cpu_read_4(pcp)          __raw_cpu_read(volatile, pcp)
 +#define this_cpu_write_1(pcp, val)    __raw_cpu_write(volatile, pcp, val)
 +#define this_cpu_write_2(pcp, val)    __raw_cpu_write(volatile, pcp, val)
 +#define this_cpu_write_4(pcp, val)    __raw_cpu_write(volatile, pcp, val)
 +
 +#ifdef CONFIG_X86_64
 +#define raw_cpu_read_8(pcp)           __raw_cpu_read(, pcp)
 +#define raw_cpu_write_8(pcp, val)     __raw_cpu_write(, pcp, val)
 +
 +#define this_cpu_read_8(pcp)          __raw_cpu_read(volatile, pcp)
 +#define this_cpu_write_8(pcp, val)    __raw_cpu_write(volatile, pcp, val)
 +#endif
 +
 +#define this_cpu_read_const(pcp)      __raw_cpu_read(, pcp)
 +#else /* CONFIG_USE_X86_SEG_SUPPORT */
 +
  #define raw_cpu_read_1(pcp)           percpu_from_op(1, , "mov", pcp)
  #define raw_cpu_read_2(pcp)           percpu_from_op(2, , "mov", pcp)
  #define raw_cpu_read_4(pcp)           percpu_from_op(4, , "mov", pcp)
 -
  #define raw_cpu_write_1(pcp, val)     percpu_to_op(1, , "mov", (pcp), val)
  #define raw_cpu_write_2(pcp, val)     percpu_to_op(2, , "mov", (pcp), val)
  #define raw_cpu_write_4(pcp, val)     percpu_to_op(4, , "mov", (pcp), val)
 +
 +#define this_cpu_read_1(pcp)          percpu_from_op(1, volatile, "mov", pcp)
 +#define this_cpu_read_2(pcp)          percpu_from_op(2, volatile, "mov", pcp)
 +#define this_cpu_read_4(pcp)          percpu_from_op(4, volatile, "mov", pcp)
 +#define this_cpu_write_1(pcp, val)    percpu_to_op(1, volatile, "mov", (pcp), val)
 +#define this_cpu_write_2(pcp, val)    percpu_to_op(2, volatile, "mov", (pcp), val)
 +#define this_cpu_write_4(pcp, val)    percpu_to_op(4, volatile, "mov", (pcp), val)
 +
 +#ifdef CONFIG_X86_64
 +#define raw_cpu_read_8(pcp)           percpu_from_op(8, , "mov", pcp)
 +#define raw_cpu_write_8(pcp, val)     percpu_to_op(8, , "mov", (pcp), val)
 +
 +#define this_cpu_read_8(pcp)          percpu_from_op(8, volatile, "mov", pcp)
 +#define this_cpu_write_8(pcp, val)    percpu_to_op(8, volatile, "mov", (pcp), val)
 +#endif
 +
 +/*
 + * The generic per-cpu infrastrucutre is not suitable for
 + * reading const-qualified variables.
 + */
 +#define this_cpu_read_const(pcp)      ({ BUILD_BUG(); (typeof(pcp))0; })
 +#endif /* CONFIG_USE_X86_SEG_SUPPORT */
 +
  #define raw_cpu_add_1(pcp, val)               percpu_add_op(1, , (pcp), val)
  #define raw_cpu_add_2(pcp, val)               percpu_add_op(2, , (pcp), val)
  #define raw_cpu_add_4(pcp, val)               percpu_add_op(4, , (pcp), val)
  #define raw_cpu_xchg_2(pcp, val)      raw_percpu_xchg_op(pcp, val)
  #define raw_cpu_xchg_4(pcp, val)      raw_percpu_xchg_op(pcp, val)
  
 -#define this_cpu_read_1(pcp)          percpu_from_op(1, volatile, "mov", pcp)
 -#define this_cpu_read_2(pcp)          percpu_from_op(2, volatile, "mov", pcp)
 -#define this_cpu_read_4(pcp)          percpu_from_op(4, volatile, "mov", pcp)
 -#define this_cpu_write_1(pcp, val)    percpu_to_op(1, volatile, "mov", (pcp), val)
 -#define this_cpu_write_2(pcp, val)    percpu_to_op(2, volatile, "mov", (pcp), val)
 -#define this_cpu_write_4(pcp, val)    percpu_to_op(4, volatile, "mov", (pcp), val)
  #define this_cpu_add_1(pcp, val)      percpu_add_op(1, volatile, (pcp), val)
  #define this_cpu_add_2(pcp, val)      percpu_add_op(2, volatile, (pcp), val)
  #define this_cpu_add_4(pcp, val)      percpu_add_op(4, volatile, (pcp), val)
   * 32 bit must fall back to generic operations.
   */
  #ifdef CONFIG_X86_64
 -#define raw_cpu_read_8(pcp)                   percpu_from_op(8, , "mov", pcp)
 -#define raw_cpu_write_8(pcp, val)             percpu_to_op(8, , "mov", (pcp), val)
  #define raw_cpu_add_8(pcp, val)                       percpu_add_op(8, , (pcp), val)
  #define raw_cpu_and_8(pcp, val)                       percpu_to_op(8, , "and", (pcp), val)
  #define raw_cpu_or_8(pcp, val)                        percpu_to_op(8, , "or", (pcp), val)
  #define raw_cpu_cmpxchg_8(pcp, oval, nval)    percpu_cmpxchg_op(8, , pcp, oval, nval)
  #define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval)       percpu_try_cmpxchg_op(8, , pcp, ovalp, nval)
  
 -#define this_cpu_read_8(pcp)                  percpu_from_op(8, volatile, "mov", pcp)
 -#define this_cpu_write_8(pcp, val)            percpu_to_op(8, volatile, "mov", (pcp), val)
  #define this_cpu_add_8(pcp, val)              percpu_add_op(8, volatile, (pcp), val)
  #define this_cpu_and_8(pcp, val)              percpu_to_op(8, volatile, "and", (pcp), val)
  #define this_cpu_or_8(pcp, val)                       percpu_to_op(8, volatile, "or", (pcp), val)
@@@ -601,7 -494,7 +601,7 @@@ static inline bool x86_this_cpu_variabl
        asm volatile("btl "__percpu_arg(2)",%1"
                        CC_SET(c)
                        : CC_OUT(c) (oldbit)
 -                      : "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
 +                      : "m" (*__my_cpu_ptr((unsigned long __percpu *)(addr))), "Ir" (nr));
  
        return oldbit;
  }
index 4b2a35d8d56a837e38017f2331307fe767b5e12f,af77235fded63b64ec99844778796a294cf29d0a..919909d8cb77e3d630af36bf7df00f10fc3765f2
@@@ -6,7 -6,6 +6,6 @@@
  #include <asm/percpu.h>
  #include <asm/current.h>
  
- #include <linux/thread_info.h>
  #include <linux/static_call_types.h>
  
  /* We use the MSB mostly because its available */
@@@ -92,7 -91,7 +91,7 @@@ static __always_inline void __preempt_c
   */
  static __always_inline bool __preempt_count_dec_and_test(void)
  {
 -      return GEN_UNARY_RMWcc("decl", pcpu_hot.preempt_count, e,
 +      return GEN_UNARY_RMWcc("decl", __my_cpu_var(pcpu_hot.preempt_count), e,
                               __percpu_arg([var]));
  }
  
index a94a857152c44506feb4d54b831182e9f6b37655,26620d7642a9fcf9d4a822140a1dd009399ee16a..1188e8bf76a2998d9d8cb50d0da0ae9a9d28351e
@@@ -75,11 -75,36 +75,36 @@@ extern u16 __read_mostly tlb_lld_4m[NR_
  extern u16 __read_mostly tlb_lld_1g[NR_INFO];
  
  /*
-  *  CPU type and hardware bug flags. Kept separately for each CPU.
-  *  Members of this structure are referenced in head_32.S, so think twice
-  *  before touching them. [mj]
+  * CPU type and hardware bug flags. Kept separately for each CPU.
   */
  
+ struct cpuinfo_topology {
+       // Real APIC ID read from the local APIC
+       u32                     apicid;
+       // The initial APIC ID provided by CPUID
+       u32                     initial_apicid;
+       // Physical package ID
+       u32                     pkg_id;
+       // Physical die ID on AMD, Relative on Intel
+       u32                     die_id;
+       // Compute unit ID - AMD specific
+       u32                     cu_id;
+       // Core ID relative to the package
+       u32                     core_id;
+       // Logical ID mappings
+       u32                     logical_pkg_id;
+       u32                     logical_die_id;
+       // Cache level topology IDs
+       u32                     llc_id;
+       u32                     l2c_id;
+ };
  struct cpuinfo_x86 {
        __u8                    x86;            /* CPU family */
        __u8                    x86_vendor;     /* CPU vendor */
        __u8                    x86_phys_bits;
        /* CPUID returned core id bits: */
        __u8                    x86_coreid_bits;
-       __u8                    cu_id;
        /* Max extended CPUID function supported: */
        __u32                   extended_cpuid_level;
        /* Maximum supported CPUID level, -1=no CPUID: */
        };
        char                    x86_vendor_id[16];
        char                    x86_model_id[64];
+       struct cpuinfo_topology topo;
        /* in KB - valid for CPUS which support this call: */
        unsigned int            x86_cache_size;
        int                     x86_cache_alignment;    /* In bytes */
        u64                     ppin;
        /* cpuid returned max cores value: */
        u16                     x86_max_cores;
-       u16                     apicid;
-       u16                     initial_apicid;
        u16                     x86_clflush_size;
        /* number of cores as seen by the OS: */
        u16                     booted_cores;
-       /* Physical processor id: */
-       u16                     phys_proc_id;
-       /* Logical processor id: */
-       u16                     logical_proc_id;
-       /* Core id: */
-       u16                     cpu_core_id;
-       u16                     cpu_die_id;
-       u16                     logical_die_id;
        /* Index into per_cpu list: */
        u16                     cpu_index;
        /*  Is SMT active on this core? */
@@@ -399,7 -414,7 +414,7 @@@ static inline unsigned long cpu_kernelm
        return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
  }
  
- extern asmlinkage void ignore_sysret(void);
+ extern asmlinkage void entry_SYSCALL32_ignore(void);
  
  /* Save actual FS/GS selectors and bases to current->thread */
  void current_save_fsgs(void);
@@@ -518,9 -533,6 +533,9 @@@ static __always_inline unsigned long cu
         *  and around vm86 mode and sp0 on x86_64 is special because of the
         *  entry trampoline.
         */
 +      if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
 +              return this_cpu_read_const(const_pcpu_hot.top_of_stack);
 +
        return this_cpu_read_stable(pcpu_hot.top_of_stack);
  }
  
@@@ -681,18 -693,24 +696,24 @@@ extern int set_tsc_mode(unsigned int va
  
  DECLARE_PER_CPU(u64, msr_misc_features_shadow);
  
- extern u16 get_llc_id(unsigned int cpu);
+ static inline u32 per_cpu_llc_id(unsigned int cpu)
+ {
+       return per_cpu(cpu_info.topo.llc_id, cpu);
+ }
+ static inline u32 per_cpu_l2c_id(unsigned int cpu)
+ {
+       return per_cpu(cpu_info.topo.l2c_id, cpu);
+ }
  
  #ifdef CONFIG_CPU_SUP_AMD
  extern u32 amd_get_nodes_per_socket(void);
  extern u32 amd_get_highest_perf(void);
- extern bool cpu_has_ibpb_brtype_microcode(void);
  extern void amd_clear_divider(void);
  extern void amd_check_microcode(void);
  #else
  static inline u32 amd_get_nodes_per_socket(void)      { return 0; }
  static inline u32 amd_get_highest_perf(void)          { return 0; }
- static inline bool cpu_has_ibpb_brtype_microcode(void)        { return false; }
  static inline void amd_clear_divider(void)            { }
  static inline void amd_check_microcode(void)          { }
  #endif
@@@ -729,14 -747,24 +750,24 @@@ enum mds_mitigations 
        MDS_MITIGATION_VMWERV,
  };
  
- #ifdef CONFIG_X86_SGX
- int arch_memory_failure(unsigned long pfn, int flags);
- #define arch_memory_failure arch_memory_failure
- bool arch_is_platform_page(u64 paddr);
- #define arch_is_platform_page arch_is_platform_page
- #endif
  extern bool gds_ucode_mitigated(void);
  
+ /*
+  * Make previous memory operations globally visible before
+  * a WRMSR.
+  *
+  * MFENCE makes writes visible, but only affects load/store
+  * instructions.  WRMSR is unfortunately not a load/store
+  * instruction and is unaffected by MFENCE.  The LFENCE ensures
+  * that the WRMSR is not reordered.
+  *
+  * Most WRMSRs are full serializing instructions themselves and
+  * do not require this barrier.  This is only required for the
+  * IA32_TSC_DEADLINE and X2APIC MSRs.
+  */
+ static inline void weak_wrmsr_fence(void)
+ {
+       alternative("mfence; lfence", "", ALT_NOT(X86_FEATURE_APIC_MSRS_FENCE));
+ }
  #endif /* _ASM_X86_PROCESSOR_H */
index fb338f00d38343bc7bc1c380245756ba75fc53b4,0b70653a98c1573a475edf808601cb1c33f9a45e..345aafbc19648865f7262b64e43d1ae5176aae4c
@@@ -6,20 -6,6 +6,6 @@@
  #include <linux/stddef.h>
  #include <asm/ptrace.h>
  
- struct paravirt_patch_site;
- #ifdef CONFIG_PARAVIRT
- void apply_paravirt(struct paravirt_patch_site *start,
-                   struct paravirt_patch_site *end);
- #else
- static inline void apply_paravirt(struct paravirt_patch_site *start,
-                                 struct paravirt_patch_site *end)
- {}
- #define __parainstructions    NULL
- #define __parainstructions_end        NULL
- #endif
- extern void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len);
  /*
   * Currently, the max observed size in the kernel code is
   * JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5.
@@@ -29,6 -15,6 +15,8 @@@
  
  extern void text_poke_early(void *addr, const void *opcode, size_t len);
  
++extern void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len);
++
  /*
   * Clear and restore the kernel write-protection flag on the local CPU.
   * Allows the kernel to edit read-only pages.
index 1781e020f393fc46f73ba1c8f42e04f91f202ab3,1d85cb7071cb21c84899477ec4a150d2fcc4da43..e7aeae02aacaf6a63deb7ac065fc1bbca5e049a3
@@@ -30,6 -30,7 +30,7 @@@
  #include <asm/fixmap.h>
  #include <asm/paravirt.h>
  #include <asm/asm-prototypes.h>
+ #include <asm/cfi.h>
  
  int __read_mostly alternatives_patched;
  
@@@ -44,7 -45,7 +45,7 @@@ EXPORT_SYMBOL_GPL(alternatives_patched)
  #define DA_ENDBR      0x08
  #define DA_SMP                0x10
  
 -static unsigned int __initdata_or_module debug_alternative;
 +static unsigned int debug_alternative;
  
  static int __init debug_alt(char *str)
  {
@@@ -132,7 -133,7 +133,7 @@@ const unsigned char * const x86_nops[AS
   * each single-byte NOPs). If @len to fill out is > ASM_NOP_MAX, pad with INT3 and
   * *jump* over instead of executing long and daft NOPs.
   */
 -static void __init_or_module add_nop(u8 *instr, unsigned int len)
 +static void add_nop(u8 *instr, unsigned int len)
  {
        u8 *target = instr + len;
  
@@@ -160,7 -161,6 +161,6 @@@ extern s32 __retpoline_sites[], __retpo
  extern s32 __return_sites[], __return_sites_end[];
  extern s32 __cfi_sites[], __cfi_sites_end[];
  extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
- extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
  extern s32 __smp_locks[], __smp_locks_end[];
  void text_poke_early(void *addr, const void *opcode, size_t len);
  
@@@ -206,7 -206,7 +206,7 @@@ static int skip_nops(u8 *instr, int off
   * Optimize a sequence of NOPs, possibly preceded by an unconditional jump
   * to the end of the NOP sequence into a single NOP.
   */
 -static bool __init_or_module
 +static bool
  __optimize_nops(u8 *instr, size_t len, struct insn *insn, int *next, int *prev, int *target)
  {
        int i = *next - insn->length;
@@@ -255,6 -255,16 +255,16 @@@ static void __init_or_module noinline o
        }
  }
  
+ static void __init_or_module noinline optimize_nops_inplace(u8 *instr, size_t len)
+ {
+       unsigned long flags;
+       local_irq_save(flags);
+       optimize_nops(instr, len);
+       sync_core();
+       local_irq_restore(flags);
+ }
  /*
   * In this context, "source" is where the instructions are placed in the
   * section .altinstr_replacement, for example during kernel build by the
@@@ -325,7 -335,8 +335,7 @@@ bool need_reloc(unsigned long offset, u
        return (target < src || target > src + src_len);
  }
  
 -static void __init_or_module noinline
 -apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
 +void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
  {
        int prev, target = 0;
  
        }
  }
  
+ /* Low-level backend functions usable from alternative code replacements. */
+ DEFINE_ASM_FUNC(nop_func, "", .entry.text);
+ EXPORT_SYMBOL_GPL(nop_func);
+ noinstr void BUG_func(void)
+ {
+       BUG();
+ }
+ EXPORT_SYMBOL(BUG_func);
+ #define CALL_RIP_REL_OPCODE   0xff
+ #define CALL_RIP_REL_MODRM    0x15
+ /*
+  * Rewrite the "call BUG_func" replacement to point to the target of the
+  * indirect pv_ops call "call *disp(%ip)".
+  */
+ static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a)
+ {
+       void *target, *bug = &BUG_func;
+       s32 disp;
+       if (a->replacementlen != 5 || insn_buff[0] != CALL_INSN_OPCODE) {
+               pr_err("ALT_FLAG_DIRECT_CALL set for a non-call replacement instruction\n");
+               BUG();
+       }
+       if (a->instrlen != 6 ||
+           instr[0] != CALL_RIP_REL_OPCODE ||
+           instr[1] != CALL_RIP_REL_MODRM) {
+               pr_err("ALT_FLAG_DIRECT_CALL set for unrecognized indirect call\n");
+               BUG();
+       }
+       /* Skip CALL_RIP_REL_OPCODE and CALL_RIP_REL_MODRM */
+       disp = *(s32 *)(instr + 2);
+ #ifdef CONFIG_X86_64
+       /* ff 15 00 00 00 00   call   *0x0(%rip) */
+       /* target address is stored at "next instruction + disp". */
+       target = *(void **)(instr + a->instrlen + disp);
+ #else
+       /* ff 15 00 00 00 00   call   *0x0 */
+       /* target address is stored at disp. */
+       target = *(void **)disp;
+ #endif
+       if (!target)
+               target = bug;
+       /* (BUG_func - .) + (target - BUG_func) := target - . */
+       *(s32 *)(insn_buff + 1) += target - bug;
+       if (target == &nop_func)
+               return 0;
+       return 5;
+ }
  /*
   * Replace instructions with better alternatives for this CPU type. This runs
   * before SMP is initialized to avoid SMP problems with self modifying code.
@@@ -402,6 -470,17 +469,17 @@@ void __init_or_module noinline apply_al
        u8 insn_buff[MAX_PATCH_LEN];
  
        DPRINTK(ALT, "alt table %px, -> %px", start, end);
+       /*
+        * In the case CONFIG_X86_5LEVEL=y, KASAN_SHADOW_START is defined using
+        * cpu_feature_enabled(X86_FEATURE_LA57) and is therefore patched here.
+        * During the process, KASAN becomes confused seeing partial LA57
+        * conversion and triggers a false-positive out-of-bound report.
+        *
+        * Disable KASAN until the patching is complete.
+        */
+       kasan_disable_current();
        /*
         * The scan order should be from start to end. A later scanned
         * alternative code can overwrite previously scanned alternative code.
                 *   patch if feature is *NOT* present.
                 */
                if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) {
-                       optimize_nops(instr, a->instrlen);
+                       optimize_nops_inplace(instr, a->instrlen);
                        continue;
                }
  
-               DPRINTK(ALT, "feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)",
-                       (a->flags & ALT_FLAG_NOT) ? "!" : "",
+               DPRINTK(ALT, "feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d) flags: 0x%x",
                        a->cpuid >> 5,
                        a->cpuid & 0x1f,
                        instr, instr, a->instrlen,
-                       replacement, a->replacementlen);
+                       replacement, a->replacementlen, a->flags);
  
                memcpy(insn_buff, replacement, a->replacementlen);
                insn_buff_sz = a->replacementlen;
  
+               if (a->flags & ALT_FLAG_DIRECT_CALL) {
+                       insn_buff_sz = alt_replace_call(instr, insn_buff, a);
+                       if (insn_buff_sz < 0)
+                               continue;
+               }
                for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
                        insn_buff[insn_buff_sz] = 0x90;
  
  
                text_poke_early(instr, insn_buff, insn_buff_sz);
        }
+       kasan_enable_current();
  }
  
  static inline bool is_jcc32(struct insn *insn)
@@@ -719,13 -805,8 +804,8 @@@ void __init_or_module noinline apply_re
  {
        s32 *s;
  
-       /*
-        * Do not patch out the default return thunks if those needed are the
-        * ones generated by the compiler.
-        */
-       if (cpu_feature_enabled(X86_FEATURE_RETHUNK) &&
-           (x86_return_thunk == __x86_return_thunk))
-               return;
+       if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+               static_call_force_reinit();
  
        for (s = start; s < end; s++) {
                void *dest = NULL, *addr = (void *)s + *s;
@@@ -823,15 -904,82 +903,82 @@@ void __init_or_module apply_seal_endbr(
  #endif /* CONFIG_X86_KERNEL_IBT */
  
  #ifdef CONFIG_FINEIBT
+ #define __CFI_DEFAULT CFI_DEFAULT
+ #elif defined(CONFIG_CFI_CLANG)
+ #define __CFI_DEFAULT CFI_KCFI
+ #else
+ #define __CFI_DEFAULT CFI_OFF
+ #endif
  
- enum cfi_mode {
-       CFI_DEFAULT,
-       CFI_OFF,
-       CFI_KCFI,
-       CFI_FINEIBT,
- };
+ enum cfi_mode cfi_mode __ro_after_init = __CFI_DEFAULT;
+ #ifdef CONFIG_CFI_CLANG
+ struct bpf_insn;
+ /* Must match bpf_func_t / DEFINE_BPF_PROG_RUN() */
+ extern unsigned int __bpf_prog_runX(const void *ctx,
+                                   const struct bpf_insn *insn);
+ /*
+  * Force a reference to the external symbol so the compiler generates
+  * __kcfi_typid.
+  */
+ __ADDRESSABLE(__bpf_prog_runX);
+ /* u32 __ro_after_init cfi_bpf_hash = __kcfi_typeid___bpf_prog_runX; */
+ asm (
+ "     .pushsection    .data..ro_after_init,\"aw\",@progbits   \n"
+ "     .type   cfi_bpf_hash,@object                            \n"
+ "     .globl  cfi_bpf_hash                                    \n"
+ "     .p2align        2, 0x0                                  \n"
+ "cfi_bpf_hash:                                                        \n"
+ "     .long   __kcfi_typeid___bpf_prog_runX                   \n"
+ "     .size   cfi_bpf_hash, 4                                 \n"
+ "     .popsection                                             \n"
+ );
+ /* Must match bpf_callback_t */
+ extern u64 __bpf_callback_fn(u64, u64, u64, u64, u64);
+ __ADDRESSABLE(__bpf_callback_fn);
+ /* u32 __ro_after_init cfi_bpf_subprog_hash = __kcfi_typeid___bpf_callback_fn; */
+ asm (
+ "     .pushsection    .data..ro_after_init,\"aw\",@progbits   \n"
+ "     .type   cfi_bpf_subprog_hash,@object                    \n"
+ "     .globl  cfi_bpf_subprog_hash                            \n"
+ "     .p2align        2, 0x0                                  \n"
+ "cfi_bpf_subprog_hash:                                                \n"
+ "     .long   __kcfi_typeid___bpf_callback_fn                 \n"
+ "     .size   cfi_bpf_subprog_hash, 4                         \n"
+ "     .popsection                                             \n"
+ );
+ u32 cfi_get_func_hash(void *func)
+ {
+       u32 hash;
+       func -= cfi_get_offset();
+       switch (cfi_mode) {
+       case CFI_FINEIBT:
+               func += 7;
+               break;
+       case CFI_KCFI:
+               func += 1;
+               break;
+       default:
+               return 0;
+       }
+       if (get_kernel_nofault(hash, func))
+               return 0;
+       return hash;
+ }
+ #endif
+ #ifdef CONFIG_FINEIBT
  
- static enum cfi_mode cfi_mode __ro_after_init = CFI_DEFAULT;
  static bool cfi_rand __ro_after_init = true;
  static u32  cfi_seed __ro_after_init;
  
@@@ -1140,8 -1288,11 +1287,11 @@@ static void __apply_fineibt(s32 *start_
                goto err;
  
        if (cfi_rand) {
-               if (builtin)
+               if (builtin) {
                        cfi_seed = get_random_u32();
+                       cfi_bpf_hash = cfi_rehash(cfi_bpf_hash);
+                       cfi_bpf_subprog_hash = cfi_rehash(cfi_bpf_subprog_hash);
+               }
  
                ret = cfi_rand_preamble(start_cfi, end_cfi);
                if (ret)
@@@ -1402,46 -1553,6 +1552,6 @@@ int alternatives_text_reserved(void *st
  }
  #endif /* CONFIG_SMP */
  
- #ifdef CONFIG_PARAVIRT
- /* Use this to add nops to a buffer, then text_poke the whole buffer. */
- static void __init_or_module add_nops(void *insns, unsigned int len)
- {
-       while (len > 0) {
-               unsigned int noplen = len;
-               if (noplen > ASM_NOP_MAX)
-                       noplen = ASM_NOP_MAX;
-               memcpy(insns, x86_nops[noplen], noplen);
-               insns += noplen;
-               len -= noplen;
-       }
- }
- void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
-                                    struct paravirt_patch_site *end)
- {
-       struct paravirt_patch_site *p;
-       char insn_buff[MAX_PATCH_LEN];
-       for (p = start; p < end; p++) {
-               unsigned int used;
-               BUG_ON(p->len > MAX_PATCH_LEN);
-               /* prep the buffer with the original instructions */
-               memcpy(insn_buff, p->instr, p->len);
-               used = paravirt_patch(p->type, insn_buff, (unsigned long)p->instr, p->len);
-               BUG_ON(used > p->len);
-               /* Pad the rest with nops */
-               add_nops(insn_buff + used, p->len - used);
-               text_poke_early(p->instr, insn_buff, p->len);
-       }
- }
- extern struct paravirt_patch_site __start_parainstructions[],
-       __stop_parainstructions[];
- #endif        /* CONFIG_PARAVIRT */
  /*
   * Self-test for the INT3 based CALL emulation code.
   *
@@@ -1577,28 -1688,11 +1687,11 @@@ void __init alternative_instructions(vo
         */
  
        /*
-        * Paravirt patching and alternative patching can be combined to
-        * replace a function call with a short direct code sequence (e.g.
-        * by setting a constant return value instead of doing that in an
-        * external function).
-        * In order to make this work the following sequence is required:
-        * 1. set (artificial) features depending on used paravirt
-        *    functions which can later influence alternative patching
-        * 2. apply paravirt patching (generally replacing an indirect
-        *    function call with a direct one)
-        * 3. apply alternative patching (e.g. replacing a direct function
-        *    call with a custom code sequence)
-        * Doing paravirt patching after alternative patching would clobber
-        * the optimization of the custom code with a function call again.
+        * Make sure to set (artificial) features depending on used paravirt
+        * functions which can later influence alternative patching.
         */
        paravirt_set_cap();
  
-       /*
-        * First patch paravirt functions, such that we overwrite the indirect
-        * call with the direct call.
-        */
-       apply_paravirt(__parainstructions, __parainstructions_end);
        __apply_fineibt(__retpoline_sites, __retpoline_sites_end,
                        __cfi_sites, __cfi_sites_end, true);
  
        apply_retpolines(__retpoline_sites, __retpoline_sites_end);
        apply_returns(__return_sites, __return_sites_end);
  
-       /*
-        * Then patch alternatives, such that those paravirt calls that are in
-        * alternatives can be overwritten by their immediate fragments.
-        */
        apply_alternatives(__alt_instructions, __alt_instructions_end);
  
        /*
@@@ -1676,8 -1766,8 +1765,8 @@@ void __init_or_module text_poke_early(v
        } else {
                local_irq_save(flags);
                memcpy(addr, opcode, len);
-               local_irq_restore(flags);
                sync_core();
+               local_irq_restore(flags);
  
                /*
                 * Could also do a CLFLUSH here to speed up CPU recovery; but
@@@ -1887,7 -1977,7 +1976,7 @@@ static void *__text_poke(text_poke_f fu
   * Note that the caller must ensure that if the modified code is part of a
   * module, the module would not be removed during poking. This can be achieved
   * by registering a module notifier, and ordering module removal and patching
-  * trough a mutex.
+  * through a mutex.
   */
  void *text_poke(void *addr, const void *opcode, size_t len)
  {
index 2324c7f9a84131829ee01697041e8c0670174bf2,64ad2ddea121940a99a8c55746986ef0b84c1c41..cf7e5be1b844b2316aab120b21e41b0831dac2b4
@@@ -24,8 -24,6 +24,8 @@@
  
  static int __initdata_or_module debug_callthunks;
  
 +#define MAX_PATCH_LEN (255-1)
 +
  #define prdbg(fmt, args...)                                   \
  do {                                                          \
        if (debug_callthunks)                                   \
@@@ -50,11 -48,6 +50,6 @@@ EXPORT_SYMBOL_GPL(__x86_call_count)
  
  extern s32 __call_sites[], __call_sites_end[];
  
- struct thunk_desc {
-       void            *template;
-       unsigned int    template_size;
- };
  struct core_text {
        unsigned long   base;
        unsigned long   end;
@@@ -186,15 -179,10 +181,15 @@@ static const u8 nops[] = 
  static void *patch_dest(void *dest, bool direct)
  {
        unsigned int tsize = SKL_TMPL_SIZE;
 +      u8 insn_buff[MAX_PATCH_LEN];
        u8 *pad = dest - tsize;
  
 +      memcpy(insn_buff, skl_call_thunk_template, tsize);
 +      apply_relocation(insn_buff, tsize, pad,
 +                       skl_call_thunk_template, tsize);
 +
        /* Already patched? */
 -      if (!bcmp(pad, skl_call_thunk_template, tsize))
 +      if (!bcmp(pad, insn_buff, tsize))
                return pad;
  
        /* Ensure there are nops */
        }
  
        if (direct)
 -              memcpy(pad, skl_call_thunk_template, tsize);
 +              memcpy(pad, insn_buff, tsize);
        else
 -              text_poke_copy_locked(pad, skl_call_thunk_template, tsize, true);
 +              text_poke_copy_locked(pad, insn_buff, tsize, true);
        return pad;
  }
  
@@@ -245,14 -233,13 +240,13 @@@ patch_call_sites(s32 *start, s32 *end, 
  }
  
  static __init_or_module void
- patch_paravirt_call_sites(struct paravirt_patch_site *start,
-                         struct paravirt_patch_site *end,
-                         const struct core_text *ct)
+ patch_alt_call_sites(struct alt_instr *start, struct alt_instr *end,
+                    const struct core_text *ct)
  {
-       struct paravirt_patch_site *p;
+       struct alt_instr *a;
  
-       for (p = start; p < end; p++)
-               patch_call(p->instr, ct);
+       for (a = start; a < end; a++)
+               patch_call((void *)&a->instr_offset + a->instr_offset, ct);
  }
  
  static __init_or_module void
@@@ -260,7 -247,7 +254,7 @@@ callthunks_setup(struct callthunk_site
  {
        prdbg("Patching call sites %s\n", ct->name);
        patch_call_sites(cs->call_start, cs->call_end, ct);
-       patch_paravirt_call_sites(cs->pv_start, cs->pv_end, ct);
+       patch_alt_call_sites(cs->alt_start, cs->alt_end, ct);
        prdbg("Patching call sites done%s\n", ct->name);
  }
  
@@@ -269,8 -256,8 +263,8 @@@ void __init callthunks_patch_builtin_ca
        struct callthunk_sites cs = {
                .call_start     = __call_sites,
                .call_end       = __call_sites_end,
-               .pv_start       = __parainstructions,
-               .pv_end         = __parainstructions_end
+               .alt_start      = __alt_instructions,
+               .alt_end        = __alt_instructions_end
        };
  
        if (!cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
        pr_info("Setting up call depth tracking\n");
        mutex_lock(&text_mutex);
        callthunks_setup(&cs, &builtin_coretext);
-       static_call_force_reinit();
        thunks_initialized = true;
        mutex_unlock(&text_mutex);
  }
@@@ -304,27 -290,20 +297,27 @@@ void *callthunks_translate_call_dest(vo
  static bool is_callthunk(void *addr)
  {
        unsigned int tmpl_size = SKL_TMPL_SIZE;
 -      void *tmpl = skl_call_thunk_template;
 +      u8 insn_buff[MAX_PATCH_LEN];
        unsigned long dest;
 +      u8 *pad;
  
        dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT);
        if (!thunks_initialized || skip_addr((void *)dest))
                return false;
  
 -      return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size);
 +      pad = (void *)(dest - tmpl_size);
 +
 +      memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
 +      apply_relocation(insn_buff, tmpl_size, pad,
 +                       skl_call_thunk_template, tmpl_size);
 +
 +      return !bcmp(pad, insn_buff, tmpl_size);
  }
  
  int x86_call_depth_emit_accounting(u8 **pprog, void *func)
  {
        unsigned int tmpl_size = SKL_TMPL_SIZE;
 -      void *tmpl = skl_call_thunk_template;
 +      u8 insn_buff[MAX_PATCH_LEN];
  
        if (!thunks_initialized)
                return 0;
        if (func && is_callthunk(func))
                return 0;
  
 -      memcpy(*pprog, tmpl, tmpl_size);
 +      memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
 +      apply_relocation(insn_buff, tmpl_size, *pprog,
 +                       skl_call_thunk_template, tmpl_size);
 +
 +      memcpy(*pprog, insn_buff, tmpl_size);
        *pprog += tmpl_size;
        return tmpl_size;
  }
index 4cc0ab0dfbb54435595eeac063c3cb512db4a099,0b97bcde70c6102a4b82b561c3256ec53b614770..8f367d3765208c215c3ad1f560ad5121295cf1e7
  #include <asm/intel-family.h>
  #include <asm/cpu_device_id.h>
  #include <asm/uv/uv.h>
+ #include <asm/ia32.h>
  #include <asm/set_memory.h>
  #include <asm/traps.h>
  #include <asm/sev.h>
+ #include <asm/tdx.h>
  
  #include "cpu.h"
  
@@@ -74,18 -76,6 +76,6 @@@ u32 elf_hwcap2 __read_mostly
  int smp_num_siblings = 1;
  EXPORT_SYMBOL(smp_num_siblings);
  
- /* Last level cache ID of each logical CPU */
- DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
- u16 get_llc_id(unsigned int cpu)
- {
-       return per_cpu(cpu_llc_id, cpu);
- }
- EXPORT_SYMBOL_GPL(get_llc_id);
- /* L2 cache ID of each logical CPU */
- DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id) = BAD_APICID;
  static struct ppin_info {
        int     feature;
        int     msr_ppin_ctl;
@@@ -199,45 -189,37 +189,37 @@@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_
         * TLS descriptors are currently at a different place compared to i386.
         * Hopefully nobody expects them at a fixed place (Wine?)
         */
-       [GDT_ENTRY_KERNEL32_CS]         = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
-       [GDT_ENTRY_KERNEL_CS]           = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
-       [GDT_ENTRY_KERNEL_DS]           = GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
-       [GDT_ENTRY_DEFAULT_USER32_CS]   = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff),
-       [GDT_ENTRY_DEFAULT_USER_DS]     = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff),
-       [GDT_ENTRY_DEFAULT_USER_CS]     = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff),
+       [GDT_ENTRY_KERNEL32_CS]         = GDT_ENTRY_INIT(DESC_CODE32, 0, 0xfffff),
+       [GDT_ENTRY_KERNEL_CS]           = GDT_ENTRY_INIT(DESC_CODE64, 0, 0xfffff),
+       [GDT_ENTRY_KERNEL_DS]           = GDT_ENTRY_INIT(DESC_DATA64, 0, 0xfffff),
+       [GDT_ENTRY_DEFAULT_USER32_CS]   = GDT_ENTRY_INIT(DESC_CODE32 | DESC_USER, 0, 0xfffff),
+       [GDT_ENTRY_DEFAULT_USER_DS]     = GDT_ENTRY_INIT(DESC_DATA64 | DESC_USER, 0, 0xfffff),
+       [GDT_ENTRY_DEFAULT_USER_CS]     = GDT_ENTRY_INIT(DESC_CODE64 | DESC_USER, 0, 0xfffff),
  #else
-       [GDT_ENTRY_KERNEL_CS]           = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff),
-       [GDT_ENTRY_KERNEL_DS]           = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
-       [GDT_ENTRY_DEFAULT_USER_CS]     = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff),
-       [GDT_ENTRY_DEFAULT_USER_DS]     = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff),
+       [GDT_ENTRY_KERNEL_CS]           = GDT_ENTRY_INIT(DESC_CODE32, 0, 0xfffff),
+       [GDT_ENTRY_KERNEL_DS]           = GDT_ENTRY_INIT(DESC_DATA32, 0, 0xfffff),
+       [GDT_ENTRY_DEFAULT_USER_CS]     = GDT_ENTRY_INIT(DESC_CODE32 | DESC_USER, 0, 0xfffff),
+       [GDT_ENTRY_DEFAULT_USER_DS]     = GDT_ENTRY_INIT(DESC_DATA32 | DESC_USER, 0, 0xfffff),
        /*
         * Segments used for calling PnP BIOS have byte granularity.
         * They code segments and data segments have fixed 64k limits,
         * the transfer segment sizes are set at run time.
         */
-       /* 32-bit code */
-       [GDT_ENTRY_PNPBIOS_CS32]        = GDT_ENTRY_INIT(0x409a, 0, 0xffff),
-       /* 16-bit code */
-       [GDT_ENTRY_PNPBIOS_CS16]        = GDT_ENTRY_INIT(0x009a, 0, 0xffff),
-       /* 16-bit data */
-       [GDT_ENTRY_PNPBIOS_DS]          = GDT_ENTRY_INIT(0x0092, 0, 0xffff),
-       /* 16-bit data */
-       [GDT_ENTRY_PNPBIOS_TS1]         = GDT_ENTRY_INIT(0x0092, 0, 0),
-       /* 16-bit data */
-       [GDT_ENTRY_PNPBIOS_TS2]         = GDT_ENTRY_INIT(0x0092, 0, 0),
+       [GDT_ENTRY_PNPBIOS_CS32]        = GDT_ENTRY_INIT(DESC_CODE32_BIOS, 0, 0xffff),
+       [GDT_ENTRY_PNPBIOS_CS16]        = GDT_ENTRY_INIT(DESC_CODE16, 0, 0xffff),
+       [GDT_ENTRY_PNPBIOS_DS]          = GDT_ENTRY_INIT(DESC_DATA16, 0, 0xffff),
+       [GDT_ENTRY_PNPBIOS_TS1]         = GDT_ENTRY_INIT(DESC_DATA16, 0, 0),
+       [GDT_ENTRY_PNPBIOS_TS2]         = GDT_ENTRY_INIT(DESC_DATA16, 0, 0),
        /*
         * The APM segments have byte granularity and their bases
         * are set at run time.  All have 64k limits.
         */
-       /* 32-bit code */
-       [GDT_ENTRY_APMBIOS_BASE]        = GDT_ENTRY_INIT(0x409a, 0, 0xffff),
-       /* 16-bit code */
-       [GDT_ENTRY_APMBIOS_BASE+1]      = GDT_ENTRY_INIT(0x009a, 0, 0xffff),
-       /* data */
-       [GDT_ENTRY_APMBIOS_BASE+2]      = GDT_ENTRY_INIT(0x4092, 0, 0xffff),
-       [GDT_ENTRY_ESPFIX_SS]           = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
-       [GDT_ENTRY_PERCPU]              = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
+       [GDT_ENTRY_APMBIOS_BASE]        = GDT_ENTRY_INIT(DESC_CODE32_BIOS, 0, 0xffff),
+       [GDT_ENTRY_APMBIOS_BASE+1]      = GDT_ENTRY_INIT(DESC_CODE16, 0, 0xffff),
+       [GDT_ENTRY_APMBIOS_BASE+2]      = GDT_ENTRY_INIT(DESC_DATA32_BIOS, 0, 0xffff),
+       [GDT_ENTRY_ESPFIX_SS]           = GDT_ENTRY_INIT(DESC_DATA32, 0, 0xfffff),
+       [GDT_ENTRY_PERCPU]              = GDT_ENTRY_INIT(DESC_DATA32, 0, 0xfffff),
  #endif
  } };
  EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
@@@ -914,7 -896,7 +896,7 @@@ void detect_ht(struct cpuinfo_x86 *c
                return;
  
        index_msb = get_count_order(smp_num_siblings);
-       c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
+       c->topo.pkg_id = apic->phys_pkg_id(c->topo.initial_apicid, index_msb);
  
        smp_num_siblings = smp_num_siblings / c->x86_max_cores;
  
  
        core_bits = get_count_order(c->x86_max_cores);
  
-       c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
-                                      ((1 << core_bits) - 1);
+       c->topo.core_id = apic->phys_pkg_id(c->topo.initial_apicid, index_msb) &
+               ((1 << core_bits) - 1);
  #endif
  }
  
@@@ -1114,18 -1096,34 +1096,34 @@@ void get_cpu_cap(struct cpuinfo_x86 *c
  void get_cpu_address_sizes(struct cpuinfo_x86 *c)
  {
        u32 eax, ebx, ecx, edx;
+       bool vp_bits_from_cpuid = true;
  
-       if (c->extended_cpuid_level >= 0x80000008) {
+       if (!cpu_has(c, X86_FEATURE_CPUID) ||
+           (c->extended_cpuid_level < 0x80000008))
+               vp_bits_from_cpuid = false;
+       if (vp_bits_from_cpuid) {
                cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
  
                c->x86_virt_bits = (eax >> 8) & 0xff;
                c->x86_phys_bits = eax & 0xff;
+       } else {
+               if (IS_ENABLED(CONFIG_X86_64)) {
+                       c->x86_clflush_size = 64;
+                       c->x86_phys_bits = 36;
+                       c->x86_virt_bits = 48;
+               } else {
+                       c->x86_clflush_size = 32;
+                       c->x86_virt_bits = 32;
+                       c->x86_phys_bits = 32;
+                       if (cpu_has(c, X86_FEATURE_PAE) ||
+                           cpu_has(c, X86_FEATURE_PSE36))
+                               c->x86_phys_bits = 36;
+               }
        }
- #ifdef CONFIG_X86_32
-       else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
-               c->x86_phys_bits = 36;
- #endif
        c->x86_cache_bits = c->x86_phys_bits;
+       c->x86_cache_alignment = c->x86_clflush_size;
  }
  
  static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
@@@ -1303,7 -1301,7 +1301,7 @@@ static const struct x86_cpu_id cpu_vuln
        VULNBL_AMD(0x15, RETBLEED),
        VULNBL_AMD(0x16, RETBLEED),
        VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO),
-       VULNBL_HYGON(0x18, RETBLEED | SMT_RSB),
+       VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO),
        VULNBL_AMD(0x19, SRSO),
        {}
  };
@@@ -1579,17 -1577,6 +1577,6 @@@ static void __init cpu_parse_early_para
   */
  static void __init early_identify_cpu(struct cpuinfo_x86 *c)
  {
- #ifdef CONFIG_X86_64
-       c->x86_clflush_size = 64;
-       c->x86_phys_bits = 36;
-       c->x86_virt_bits = 48;
- #else
-       c->x86_clflush_size = 32;
-       c->x86_phys_bits = 32;
-       c->x86_virt_bits = 32;
- #endif
-       c->x86_cache_alignment = c->x86_clflush_size;
        memset(&c->x86_capability, 0, sizeof(c->x86_capability));
        c->extended_cpuid_level = 0;
  
                cpu_detect(c);
                get_cpu_vendor(c);
                get_cpu_cap(c);
-               get_cpu_address_sizes(c);
                setup_force_cpu_cap(X86_FEATURE_CPUID);
                cpu_parse_early_param();
  
                setup_clear_cpu_cap(X86_FEATURE_CPUID);
        }
  
+       get_cpu_address_sizes(c);
        setup_force_cpu_cap(X86_FEATURE_ALWAYS);
  
        cpu_set_bug_bits(c);
@@@ -1761,15 -1749,15 +1749,15 @@@ static void generic_identify(struct cpu
        get_cpu_address_sizes(c);
  
        if (c->cpuid_level >= 0x00000001) {
-               c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
+               c->topo.initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
  #ifdef CONFIG_X86_32
  # ifdef CONFIG_SMP
-               c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
+               c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0);
  # else
-               c->apicid = c->initial_apicid;
+               c->topo.apicid = c->topo.initial_apicid;
  # endif
  #endif
-               c->phys_proc_id = c->initial_apicid;
+               c->topo.pkg_id = c->topo.initial_apicid;
        }
  
        get_model_name(c); /* Default name */
  static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
  {
  #ifdef CONFIG_SMP
-       unsigned int apicid, cpu = smp_processor_id();
+       unsigned int cpu = smp_processor_id();
+       u32 apicid;
  
        apicid = apic->cpu_present_to_apicid(cpu);
  
-       if (apicid != c->apicid) {
+       if (apicid != c->topo.apicid) {
                pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x APIC: %x\n",
-                      cpu, apicid, c->initial_apicid);
+                      cpu, apicid, c->topo.initial_apicid);
        }
-       BUG_ON(topology_update_package_map(c->phys_proc_id, cpu));
-       BUG_ON(topology_update_die_map(c->cpu_die_id, cpu));
+       BUG_ON(topology_update_package_map(c->topo.pkg_id, cpu));
+       BUG_ON(topology_update_die_map(c->topo.die_id, cpu));
  #else
-       c->logical_proc_id = 0;
+       c->topo.logical_pkg_id = 0;
  #endif
  }
  
@@@ -1829,7 -1818,9 +1818,9 @@@ static void identify_cpu(struct cpuinfo
        c->x86_model_id[0] = '\0';  /* Unset */
        c->x86_max_cores = 1;
        c->x86_coreid_bits = 0;
-       c->cu_id = 0xff;
+       c->topo.cu_id = 0xff;
+       c->topo.llc_id = BAD_APICID;
+       c->topo.l2c_id = BAD_APICID;
  #ifdef CONFIG_X86_64
        c->x86_clflush_size = 64;
        c->x86_phys_bits = 36;
        apply_forced_caps(c);
  
  #ifdef CONFIG_X86_64
-       c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
+       c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0);
  #endif
  
+       /*
+        * Set default APIC and TSC_DEADLINE MSR fencing flag. AMD and
+        * Hygon will clear it in ->c_init() below.
+        */
+       set_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
        /*
         * Vendor-specific initialization.  In this section we
         * canonicalize the feature flags, meaning if there are
@@@ -1989,6 -1987,7 +1987,7 @@@ static __init void identify_boot_cpu(vo
        setup_cr_pinning();
  
        tsx_init();
+       tdx_init();
        lkgs_init();
  }
  
@@@ -2051,7 -2050,6 +2050,7 @@@ DEFINE_PER_CPU_ALIGNED(struct pcpu_hot
        .top_of_stack   = TOP_OF_INIT_STACK,
  };
  EXPORT_PER_CPU_SYMBOL(pcpu_hot);
 +EXPORT_PER_CPU_SYMBOL(const_pcpu_hot);
  
  #ifdef CONFIG_X86_64
  DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
@@@ -2075,24 -2073,24 +2074,24 @@@ void syscall_init(void
        wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
        wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
  
- #ifdef CONFIG_IA32_EMULATION
-       wrmsrl_cstar((unsigned long)entry_SYSCALL_compat);
-       /*
-        * This only works on Intel CPUs.
-        * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
-        * This does not cause SYSENTER to jump to the wrong location, because
-        * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
-        */
-       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-       wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
-                   (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1));
-       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
- #else
-       wrmsrl_cstar((unsigned long)ignore_sysret);
-       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
-       wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
-       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
- #endif
+       if (ia32_enabled()) {
+               wrmsrl_cstar((unsigned long)entry_SYSCALL_compat);
+               /*
+                * This only works on Intel CPUs.
+                * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
+                * This does not cause SYSENTER to jump to the wrong location, because
+                * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
+                */
+               wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+               wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
+                           (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1));
+               wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
+       } else {
+               wrmsrl_cstar((unsigned long)entry_SYSCALL32_ignore);
+               wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
+               wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+               wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
+       }
  
        /*
         * Flags to clear on syscall; clear as much as possible
@@@ -2167,8 -2165,6 +2166,6 @@@ static inline void setup_getcpu(int cpu
  }
  
  #ifdef CONFIG_X86_64
- static inline void ucode_cpu_init(int cpu) { }
  static inline void tss_setup_ist(struct tss_struct *tss)
  {
        /* Set up the per-CPU TSS IST stacks */
        /* Only mapped when SEV-ES is active */
        tss->x86_tss.ist[IST_INDEX_VC] = __this_cpu_ist_top_va(VC);
  }
  #else /* CONFIG_X86_64 */
- static inline void ucode_cpu_init(int cpu)
- {
-       show_ucode_info_early();
- }
  static inline void tss_setup_ist(struct tss_struct *tss) { }
  #endif /* !CONFIG_X86_64 */
  
  static inline void tss_setup_io_bitmap(struct tss_struct *tss)
@@@ -2244,8 -2232,6 +2233,6 @@@ void cpu_init(void
        struct task_struct *cur = current;
        int cpu = raw_smp_processor_id();
  
-       ucode_cpu_init(cpu);
  #ifdef CONFIG_NUMA
        if (this_cpu_read(numa_node) == 0 &&
            early_cpu_to_node(cpu) != NUMA_NO_NODE)
index bfe5ec2f4f83fe17de6334d2ee80b8a0d9743200,d4918d03efb4b7765bff35d3e5f28a8c3a2bc99d..bb8ee1ce696836667caa5700b4bcce6cb2ab5488
@@@ -9,7 -9,7 +9,7 @@@
   *  Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com>
   */
  
+ #include <linux/export.h>
  #include <linux/linkage.h>
  #include <linux/threads.h>
  #include <linux/init.h>
@@@ -22,7 -22,6 +22,6 @@@
  #include <asm/percpu.h>
  #include <asm/nops.h>
  #include "../entry/calling.h"
- #include <asm/export.h>
  #include <asm/nospec-branch.h>
  #include <asm/apicdef.h>
  #include <asm/fixmap.h>
@@@ -115,6 -114,28 +114,28 @@@ SYM_CODE_START_NOALIGN(startup_64
  
        /* Form the CR3 value being sure to include the CR3 modifier */
        addq    $(early_top_pgt - __START_KERNEL_map), %rax
+ #ifdef CONFIG_AMD_MEM_ENCRYPT
+       mov     %rax, %rdi
+       mov     %rax, %r14
+       addq    phys_base(%rip), %rdi
+       /*
+        * For SEV guests: Verify that the C-bit is correct. A malicious
+        * hypervisor could lie about the C-bit position to perform a ROP
+        * attack on the guest by writing to the unencrypted stack and wait for
+        * the next RET instruction.
+        */
+       call    sev_verify_cbit
+       /*
+        * Restore CR3 value without the phys_base which will be added
+        * below, before writing %cr3.
+        */
+        mov    %r14, %rax
+ #endif
        jmp 1f
  SYM_CODE_END(startup_64)
  
@@@ -180,10 -201,10 +201,10 @@@ SYM_INNER_LABEL(secondary_startup_64_no
        movl    $0, %ecx
  #endif
  
-       /* Enable PAE mode, PGE and LA57 */
-       orl     $(X86_CR4_PAE | X86_CR4_PGE), %ecx
+       /* Enable PAE mode, PSE, PGE and LA57 */
+       orl     $(X86_CR4_PAE | X86_CR4_PSE | X86_CR4_PGE), %ecx
  #ifdef CONFIG_X86_5LEVEL
-       testl   $1, __pgtable_l5_enabled(%rip)
+       testb   $1, __pgtable_l5_enabled(%rip)
        jz      1f
        orl     $X86_CR4_LA57, %ecx
  1:
        /* Setup early boot stage 4-/5-level pagetables. */
        addq    phys_base(%rip), %rax
  
-       /*
-        * For SEV guests: Verify that the C-bit is correct. A malicious
-        * hypervisor could lie about the C-bit position to perform a ROP
-        * attack on the guest by writing to the unencrypted stack and wait for
-        * the next RET instruction.
-        */
-       movq    %rax, %rdi
-       call    sev_verify_cbit
        /*
         * Switch to new page-table
         *
         * For the boot CPU this switches to early_top_pgt which still has the
-        * indentity mappings present. The secondary CPUs will switch to the
+        * identity mappings present. The secondary CPUs will switch to the
         * init_top_pgt here, away from the trampoline_pgd and unmap the
-        * indentity mapped ranges.
+        * identity mapped ranges.
         */
        movq    %rax, %cr3
  
        testl   $X2APIC_ENABLE, %eax
        jnz     .Lread_apicid_msr
  
+ #ifdef CONFIG_X86_X2APIC
+       /*
+        * If system is in X2APIC mode then MMIO base might not be
+        * mapped causing the MMIO read below to fault. Faults can't
+        * be handled at that point.
+        */
+       cmpl    $0, x2apic_mode(%rip)
+       jz      .Lread_apicid_mmio
+       /* Force the AP into X2APIC mode. */
+       orl     $X2APIC_ENABLE, %eax
+       wrmsr
+       jmp     .Lread_apicid_msr
+ #endif
+ .Lread_apicid_mmio:
        /* Read the APIC ID from the fix-mapped MMIO space. */
        movq    apic_mmio_base(%rip), %rcx
        addq    $APIC_ID, %rcx
@@@ -449,7 -477,7 +477,7 @@@ SYM_CODE_START(soft_restart_cpu
        UNWIND_HINT_END_OF_STACK
  
        /* Find the idle task stack */
 -      movq    PER_CPU_VAR(pcpu_hot) + X86_current_task, %rcx
 +      movq    PER_CPU_VAR(pcpu_hot + X86_current_task), %rcx
        movq    TASK_threadsp(%rcx), %rsp
  
        jmp     .Ljump_to_C_code
diff --combined arch/x86/kernel/smp.c
index 1bb79526c21793a9bf78afa02b5ca62b7444f171,96a771f9f930a6aba1b77967169808bce3b3eace..2908e063d7d830db32decbfefbd017fd05292700
@@@ -131,7 -131,7 +131,7 @@@ static int smp_stop_nmi_callback(unsign
  }
  
  /*
-  * Disable virtualization, APIC etc. and park the CPU in a HLT loop
+  * this function calls the 'stop' function on all other CPUs in the system.
   */
  DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
  {
@@@ -148,16 -148,14 +148,16 @@@ static int register_stop_handler(void
  
  static void native_stop_other_cpus(int wait)
  {
 -      unsigned int cpu = smp_processor_id();
 +      unsigned int old_cpu, this_cpu;
        unsigned long flags, timeout;
  
        if (reboot_force)
                return;
  
        /* Only proceed if this is the first CPU to reach this code */
 -      if (atomic_cmpxchg(&stopping_cpu, -1, cpu) != -1)
 +      old_cpu = -1;
 +      this_cpu = smp_processor_id();
 +      if (!atomic_try_cmpxchg(&stopping_cpu, &old_cpu, this_cpu))
                return;
  
        /* For kexec, ensure that offline CPUs are out of MWAIT and in HLT */
         * 2) Wait for all other CPUs to report that they reached the
         *    HLT loop in stop_this_cpu()
         *
-        * 3) If the system uses INIT/STARTUP for CPU bringup, then
-        *    send all present CPUs an INIT vector, which brings them
-        *    completely out of the way.
+        * 3) If #2 timed out send an NMI to the CPUs which did not
+        *    yet report
         *
-        * 4) If #3 is not possible and #2 timed out send an NMI to the
-        *    CPUs which did not yet report
-        *
-        * 5) Wait for all other CPUs to report that they reached the
+        * 4) Wait for all other CPUs to report that they reached the
         *    HLT loop in stop_this_cpu()
         *
-        * #4 can obviously race against a CPU reaching the HLT loop late.
+        * #3 can obviously race against a CPU reaching the HLT loop late.
         * That CPU will have reported already and the "have all CPUs
         * reached HLT" condition will be true despite the fact that the
         * other CPU is still handling the NMI. Again, there is no
         * NMIs.
         */
        cpumask_copy(&cpus_stop_mask, cpu_online_mask);
 -      cpumask_clear_cpu(cpu, &cpus_stop_mask);
 +      cpumask_clear_cpu(this_cpu, &cpus_stop_mask);
  
        if (!cpumask_empty(&cpus_stop_mask)) {
                apic_send_IPI_allbutself(REBOOT_VECTOR);
                /*
                 * Don't wait longer than a second for IPI completion. The
                 * wait request is not checked here because that would
-                * prevent an NMI/INIT shutdown in case that not all
+                * prevent an NMI shutdown attempt in case that not all
                 * CPUs reach shutdown state.
                 */
                timeout = USEC_PER_SEC;
                        udelay(1);
        }
  
-       /*
-        * Park all other CPUs in INIT including "offline" CPUs, if
-        * possible. That's a safe place where they can't resume execution
-        * of HLT and then execute the HLT loop from overwritten text or
-        * page tables.
-        *
-        * The only downside is a broadcast MCE, but up to the point where
-        * the kexec() kernel brought all APs online again an MCE will just
-        * make HLT resume and handle the MCE. The machine crashes and burns
-        * due to overwritten text, page tables and data. So there is a
-        * choice between fire and frying pan. The result is pretty much
-        * the same. Chose frying pan until x86 provides a sane mechanism
-        * to park a CPU.
-        */
-       if (smp_park_other_cpus_in_init())
-               goto done;
-       /*
-        * If park with INIT was not possible and the REBOOT_VECTOR didn't
-        * take all secondary CPUs offline, try with the NMI.
-        */
+       /* if the REBOOT_VECTOR didn't work, try with the NMI */
        if (!cpumask_empty(&cpus_stop_mask)) {
                /*
                 * If NMI IPI is enabled, try to register the stop handler
                 * CPUs to stop.
                 */
                if (!smp_no_nmi_ipi && !register_stop_handler()) {
 +                      unsigned int cpu;
 +
                        pr_emerg("Shutting down cpus with NMI\n");
  
                        for_each_cpu(cpu, &cpus_stop_mask)
                        udelay(1);
        }
  
- done:
        local_irq_save(flags);
        disable_local_APIC();
        mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
diff --combined arch/x86/kernel/traps.c
index 78b1d1a6ed2cc5488cad09f00da8b57d96e7f30f,c3b2f863acf0f3f28c7402c86de8cbaa47eb930c..4b256de7c58ac983b9f521122529b18e3d50ed09
@@@ -37,6 -37,7 +37,7 @@@
  #include <linux/nmi.h>
  #include <linux/mm.h>
  #include <linux/smp.h>
+ #include <linux/cpu.h>
  #include <linux/io.h>
  #include <linux/hardirq.h>
  #include <linux/atomic.h>
@@@ -565,7 -566,7 +566,7 @@@ static bool fixup_iopl_exception(struc
   */
  static bool try_fixup_enqcmd_gp(void)
  {
- #ifdef CONFIG_IOMMU_SVA
+ #ifdef CONFIG_ARCH_HAS_CPU_PASID
        u32 pasid;
  
        /*
        if (!mm_valid_pasid(current->mm))
                return false;
  
-       pasid = current->mm->pasid;
+       pasid = mm_get_enqcmd_pasid(current->mm);
  
        /*
         * Did this thread already have its PASID activated?
@@@ -772,7 -773,7 +773,7 @@@ DEFINE_IDTENTRY_RAW(exc_int3
   */
  asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs)
  {
 -      struct pt_regs *regs = (struct pt_regs *)this_cpu_read(pcpu_hot.top_of_stack) - 1;
 +      struct pt_regs *regs = (struct pt_regs *)current_top_of_stack() - 1;
        if (regs != eregs)
                *regs = *eregs;
        return regs;
@@@ -790,7 -791,7 +791,7 @@@ asmlinkage __visible noinstr struct pt_
         * trust it and switch to the current kernel stack
         */
        if (ip_within_syscall_gap(regs)) {
 -              sp = this_cpu_read(pcpu_hot.top_of_stack);
 +              sp = current_top_of_stack();
                goto sync;
        }
  
index e701f2dcea29594d127a0b0e8496ba6c34bf3b6d,a349dbfc6d5ab47b2f8963bacd24915a963cb2a2..9be175c8ac975b46c61ebb75a47160fb6bf1deb1
@@@ -46,7 -46,6 +46,7 @@@ ENTRY(phys_startup_64
  #endif
  
  jiffies = jiffies_64;
 +const_pcpu_hot = pcpu_hot;
  
  #if defined(CONFIG_X86_64)
  /*
@@@ -140,10 -139,7 +140,7 @@@ SECTION
                STATIC_CALL_TEXT
  
                ALIGN_ENTRY_TEXT_BEGIN
- #ifdef CONFIG_CPU_SRSO
                *(.text..__x86.rethunk_untrain)
- #endif
                ENTRY_TEXT
  
  #ifdef CONFIG_CPU_SRSO
                ALIGN_ENTRY_TEXT_END
                *(.gnu.warning)
  
-       } :text =0xcccc
+       } :text = 0xcccccccc
  
        /* End of text section, which should occupy whole number of pages */
        _etext = .;
        }
  #endif
  
-       /*
-        * start address and size of operations which during runtime
-        * can be patched with virtualization friendly instructions or
-        * baremetal native ones. Think page table operations.
-        * Details in paravirt_types.h
-        */
-       . = ALIGN(8);
-       .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
-               __parainstructions = .;
-               *(.parainstructions)
-               __parainstructions_end = .;
-       }
  #ifdef CONFIG_RETPOLINE
        /*
         * List of instructions that call/jmp/jcc to retpoline thunks
@@@ -521,12 -504,12 +505,12 @@@ INIT_PER_CPU(irq_stack_backing_store)
             "fixed_percpu_data is not at start of per-cpu area");
  #endif
  
- #ifdef CONFIG_RETHUNK
+ #ifdef CONFIG_CPU_UNRET_ENTRY
  . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
- . = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
  #endif
  
  #ifdef CONFIG_CPU_SRSO
+ . = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
  /*
   * GNU ld cannot do XOR until 2.41.
   * https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=f6f78318fca803c4907fb8d7f6ded8295f1947b1
index 8632d7dd1f004768de7ce2ec18657fb843506162,873e4ef23e49578989634629b728d4c61bcd96b4..1c96be769adc3e98c87d63dc623ed21cf770129b
@@@ -1,7 -1,7 +1,7 @@@
  /* SPDX-License-Identifier: GPL-2.0-only */
  
+ #include <linux/export.h>
  #include <linux/linkage.h>
- #include <asm/export.h>
  #include <asm/percpu.h>
  #include <asm/processor-flags.h>
  
@@@ -24,12 -24,12 +24,12 @@@ SYM_FUNC_START(cmpxchg8b_emu
        pushfl
        cli
  
 -      cmpl    0(%esi), %eax
 +      cmpl    (%esi), %eax
        jne     .Lnot_same
        cmpl    4(%esi), %edx
        jne     .Lnot_same
  
 -      movl    %ebx, 0(%esi)
 +      movl    %ebx, (%esi)
        movl    %ecx, 4(%esi)
  
        orl     $X86_EFLAGS_ZF, (%esp)
@@@ -38,7 -38,7 +38,7 @@@
        RET
  
  .Lnot_same:
 -      movl    0(%esi), %eax
 +      movl    (%esi), %eax
        movl    4(%esi), %edx
  
        andl    $(~X86_EFLAGS_ZF), (%esp)
@@@ -53,30 -53,18 +53,30 @@@ EXPORT_SYMBOL(cmpxchg8b_emu
  
  #ifndef CONFIG_UML
  
 +/*
 + * Emulate 'cmpxchg8b %fs:(%rsi)'
 + *
 + * Inputs:
 + * %esi : memory location to compare
 + * %eax : low 32 bits of old value
 + * %edx : high 32 bits of old value
 + * %ebx : low 32 bits of new value
 + * %ecx : high 32 bits of new value
 + *
 + * Notably this is not LOCK prefixed and is not safe against NMIs
 + */
  SYM_FUNC_START(this_cpu_cmpxchg8b_emu)
  
        pushfl
        cli
  
 -      cmpl    PER_CPU_VAR(0(%esi)), %eax
 +      cmpl    __percpu (%esi), %eax
        jne     .Lnot_same2
 -      cmpl    PER_CPU_VAR(4(%esi)), %edx
 +      cmpl    __percpu 4(%esi), %edx
        jne     .Lnot_same2
  
 -      movl    %ebx, PER_CPU_VAR(0(%esi))
 -      movl    %ecx, PER_CPU_VAR(4(%esi))
 +      movl    %ebx, __percpu (%esi)
 +      movl    %ecx, __percpu 4(%esi)
  
        orl     $X86_EFLAGS_ZF, (%esp)
  
@@@ -84,8 -72,8 +84,8 @@@
        RET
  
  .Lnot_same2:
 -      movl    PER_CPU_VAR(0(%esi)), %eax
 -      movl    PER_CPU_VAR(4(%esi)), %edx
 +      movl    __percpu (%esi), %eax
 +      movl    __percpu 4(%esi), %edx
  
        andl    $(~X86_EFLAGS_ZF), (%esp)
  
diff --combined arch/x86/xen/xen-asm.S
index 448958ddbaf8773ac37b5c5b38a3619b1b9a48d3,1a9cd18dfbd31208e5d1bcfa53f4a6e90bc81cf6..83189cf5cdce9361c6a878f1e8ce86e285ad9ba1
@@@ -28,7 -28,7 +28,7 @@@
   * non-zero.
   */
  SYM_FUNC_START(xen_irq_disable_direct)
 -      movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 +      movb $1, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
        RET
  SYM_FUNC_END(xen_irq_disable_direct)
  
@@@ -69,7 -69,7 +69,7 @@@ SYM_FUNC_END(check_events
  SYM_FUNC_START(xen_irq_enable_direct)
        FRAME_BEGIN
        /* Unmask events */
 -      movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 +      movb $0, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
  
        /*
         * Preempt here doesn't matter because that will deal with any
@@@ -78,7 -78,7 +78,7 @@@
         */
  
        /* Test for pending */
 -      testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
 +      testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_pending)
        jz 1f
  
        call check_events
@@@ -97,7 -97,7 +97,7 @@@ SYM_FUNC_END(xen_irq_enable_direct
   * x86 use opposite senses (mask vs enable).
   */
  SYM_FUNC_START(xen_save_fl_direct)
 -      testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 +      testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
        setz %ah
        addb %ah, %ah
        RET
@@@ -113,7 -113,7 +113,7 @@@ SYM_FUNC_END(xen_read_cr2)
  
  SYM_FUNC_START(xen_read_cr2_direct)
        FRAME_BEGIN
 -      _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
 +      _ASM_MOV PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_arch_cr2), %_ASM_AX
        FRAME_END
        RET
  SYM_FUNC_END(xen_read_cr2_direct);
@@@ -156,7 -156,7 +156,7 @@@ xen_pv_trap asm_xenpv_exc_machine_chec
  #endif /* CONFIG_X86_MCE */
  xen_pv_trap asm_exc_simd_coprocessor_error
  #ifdef CONFIG_IA32_EMULATION
- xen_pv_trap entry_INT80_compat
+ xen_pv_trap asm_int80_emulation
  #endif
  xen_pv_trap asm_exc_xen_unknown_trap
  xen_pv_trap asm_exc_xen_hypervisor_callback
diff --combined include/linux/compiler.h
index bf9815eaf4aabf60df593c261de9be2ef72906c9,bb1339c7057b49c877907e9db968e828a28d0f3f..cdcdaa48b4d2d705f0aa59dfe3450f73bc76b180
@@@ -177,10 -177,7 +177,7 @@@ void ftrace_likely_update(struct ftrace
        __asm__ ("" : "=r" (var) : "0" (var))
  #endif
  
- /* Not-quite-unique ID. */
- #ifndef __UNIQUE_ID
- # define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
- #endif
+ #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
  
  /**
   * data_race - mark an expression as containing intentional data races
   */
  #define ___ADDRESSABLE(sym, __attrs) \
        static void * __used __attrs \
 -              __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)&sym;
 +      __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)(uintptr_t)&sym;
  #define __ADDRESSABLE(sym) \
        ___ADDRESSABLE(sym, __section(".discard.addressable"))
  
@@@ -230,6 -227,14 +227,14 @@@ static inline void *offset_to_ptr(cons
  /* &a[0] degrades to a pointer: a different type from an array */
  #define __must_be_array(a)    BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
  
+ /*
+  * This returns a constant expression while determining if an argument is
+  * a constant expression, most importantly without evaluating the argument.
+  * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
+  */
+ #define __is_constexpr(x) \
+       (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
  /*
   * Whether 'type' is a signed type or an unsigned type. Supports scalar types,
   * bool and also pointer types.