x86/boot/64: Determine VA/PA offset before entering C code
authorArd Biesheuvel <ardb@kernel.org>
Thu, 5 Dec 2024 11:28:07 +0000 (12:28 +0100)
committerIngo Molnar <mingo@kernel.org>
Thu, 5 Dec 2024 12:18:54 +0000 (13:18 +0100)
Implicit absolute symbol references (e.g., taking the address of a
global variable) must be avoided in the C code that runs from the early
1:1 mapping of the kernel, given that this is a practice that violates
assumptions on the part of the toolchain. I.e., RIP-relative and
absolute references are expected to produce the same values, and so the
compiler is free to choose either. However, the code currently assumes
that RIP-relative references are never emitted here.

So an explicit virtual-to-physical offset needs to be used instead to
derive the kernel virtual addresses of _text and _end, instead of simply
taking the addresses and assuming that the compiler will not choose to
use a RIP-relative references in this particular case.

Currently, phys_base is already used to perform such calculations, but
it is derived from the kernel virtual address of _text, which is taken
using an implicit absolute symbol reference. So instead, derive this
VA-to-PA offset in asm code, and pass it to the C startup code.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lore.kernel.org/r/20241205112804.3416920-11-ardb+git@google.com
arch/x86/include/asm/setup.h
arch/x86/kernel/head64.c
arch/x86/kernel/head_64.S

index 0667b2a88614c6e4d71252426747d8b0cb627867..85f4fde3515c4338f68e4a053bfbaa7ed2db0e0e 100644 (file)
@@ -49,7 +49,7 @@ extern unsigned long saved_video_mode;
 
 extern void reserve_standard_io_resources(void);
 extern void i386_reserve_resources(void);
-extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp);
+extern unsigned long __startup_64(unsigned long p2v_offset, struct boot_params *bp);
 extern void startup_64_setup_gdt_idt(void);
 extern void early_setup_idt(void);
 extern void __init do_early_exception(struct pt_regs *regs, int trapnr);
index 4b9d4557fc94a46defab68cba0e56b2776148552..a7cd4053eeb3291d1b0b43404299c1d04bde02af 100644 (file)
@@ -138,12 +138,14 @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdv
  * doesn't have to generate PC-relative relocations when accessing globals from
  * that function. Clang actually does not generate them, which leads to
  * boot-time crashes. To work around this problem, every global pointer must
- * be accessed using RIP_REL_REF().
+ * be accessed using RIP_REL_REF(). Kernel virtual addresses can be determined
+ * by subtracting p2v_offset from the RIP-relative address.
  */
-unsigned long __head __startup_64(unsigned long physaddr,
+unsigned long __head __startup_64(unsigned long p2v_offset,
                                  struct boot_params *bp)
 {
        pmd_t (*early_pgts)[PTRS_PER_PMD] = RIP_REL_REF(early_dynamic_pgts);
+       unsigned long physaddr = (unsigned long)&RIP_REL_REF(_text);
        unsigned long pgtable_flags;
        unsigned long load_delta;
        pgdval_t *pgd;
@@ -163,7 +165,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
         * Compute the delta between the address I am compiled to run at
         * and the address I am actually running at.
         */
-       load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map);
+       load_delta = __START_KERNEL_map + p2v_offset;
        RIP_REL_REF(phys_base) = load_delta;
 
        /* Is the address not 2M aligned? */
index 56163e2124cf3aa6018bc249813a44045551c035..31345e0ba0064e23056a659ded3c3259a736412d 100644 (file)
@@ -94,13 +94,19 @@ SYM_CODE_START_NOALIGN(startup_64)
        /* Sanitize CPU configuration */
        call verify_cpu
 
+       /*
+        * Derive the kernel's physical-to-virtual offset from the physical and
+        * virtual addresses of common_startup_64().
+        */
+       leaq    common_startup_64(%rip), %rdi
+       subq    .Lcommon_startup_64(%rip), %rdi
+
        /*
         * Perform pagetable fixups. Additionally, if SME is active, encrypt
         * the kernel and retrieve the modifier (SME encryption mask if SME
         * is active) to be added to the initial pgdir entry that will be
         * programmed into CR3.
         */
-       leaq    _text(%rip), %rdi
        movq    %r15, %rsi
        call    __startup_64
 
@@ -128,11 +134,11 @@ SYM_CODE_START_NOALIGN(startup_64)
 
        /* Branch to the common startup code at its kernel virtual address */
        ANNOTATE_RETPOLINE_SAFE
-       jmp     *0f(%rip)
+       jmp     *.Lcommon_startup_64(%rip)
 SYM_CODE_END(startup_64)
 
        __INITRODATA
-0:     .quad   common_startup_64
+SYM_DATA_LOCAL(.Lcommon_startup_64, .quad common_startup_64)
 
        .text
 SYM_CODE_START(secondary_startup_64)