Commit | Line | Data |
---|---|---|
b886d83c | 1 | // SPDX-License-Identifier: GPL-2.0-only |
a2a47c6c KS |
2 | /* |
3 | * HyperV Detection code. | |
4 | * | |
5 | * Copyright (C) 2010, Novell, Inc. | |
6 | * Author : K. Y. Srinivasan <ksrinivasan@novell.com> | |
a2a47c6c KS |
7 | */ |
8 | ||
9 | #include <linux/types.h> | |
6f4151c8 S |
10 | #include <linux/time.h> |
11 | #include <linux/clocksource.h> | |
186f4360 PG |
12 | #include <linux/init.h> |
13 | #include <linux/export.h> | |
bc2b0331 | 14 | #include <linux/hardirq.h> |
9e7827b5 | 15 | #include <linux/efi.h> |
bc2b0331 | 16 | #include <linux/interrupt.h> |
1aec1696 | 17 | #include <linux/irq.h> |
2517281d | 18 | #include <linux/kexec.h> |
fd1fea68 | 19 | #include <linux/random.h> |
a2a47c6c | 20 | #include <asm/processor.h> |
e08cae41 | 21 | #include <asm/hypervisor.h> |
ef5a3c92 | 22 | #include <hyperv/hvhdk.h> |
a2a47c6c | 23 | #include <asm/mshyperv.h> |
bc2b0331 | 24 | #include <asm/desc.h> |
a16be368 | 25 | #include <asm/idtentry.h> |
bc2b0331 | 26 | #include <asm/irq_regs.h> |
9e7827b5 | 27 | #include <asm/i8259.h> |
d68ce017 | 28 | #include <asm/apic.h> |
ca3ba2a2 | 29 | #include <asm/timer.h> |
2517281d | 30 | #include <asm/reboot.h> |
59107e2f | 31 | #include <asm/nmi.h> |
bd00cd52 | 32 | #include <clocksource/hyperv_timer.h> |
efef7f18 | 33 | #include <asm/msr.h> |
333abaf5 | 34 | #include <asm/numa.h> |
4754ec7f | 35 | #include <asm/svm.h> |
a2a47c6c | 36 | |
c4bdf94f JJ |
37 | /* Is Linux running on nested Microsoft Hypervisor */ |
38 | bool hv_nested; | |
e08cae41 | 39 | struct ms_hyperv_info ms_hyperv; |
a2a47c6c | 40 | |
d3a9d7e4 DC |
41 | /* Used in modules via hv_do_hypercall(): see arch/x86/include/asm/mshyperv.h */ |
42 | bool hyperv_paravisor_present __ro_after_init; | |
43 | EXPORT_SYMBOL_GPL(hyperv_paravisor_present); | |
44 | ||
3c433679 | 45 | #if IS_ENABLED(CONFIG_HYPERV) |
0e3f7d12 | 46 | static inline unsigned int hv_get_nested_msr(unsigned int reg) |
7fec185a | 47 | { |
0e3f7d12 NDN |
48 | if (hv_is_sint_msr(reg)) |
49 | return reg - HV_X64_MSR_SINT0 + HV_X64_MSR_NESTED_SINT0; | |
b14033a3 | 50 | |
7fec185a | 51 | switch (reg) { |
0e3f7d12 NDN |
52 | case HV_X64_MSR_SIMP: |
53 | return HV_X64_MSR_NESTED_SIMP; | |
54 | case HV_X64_MSR_SIEFP: | |
55 | return HV_X64_MSR_NESTED_SIEFP; | |
56 | case HV_X64_MSR_SVERSION: | |
57 | return HV_X64_MSR_NESTED_SVERSION; | |
58 | case HV_X64_MSR_SCONTROL: | |
59 | return HV_X64_MSR_NESTED_SCONTROL; | |
60 | case HV_X64_MSR_EOM: | |
61 | return HV_X64_MSR_NESTED_EOM; | |
7fec185a JJ |
62 | default: |
63 | return reg; | |
64 | } | |
65 | } | |
66 | ||
0e3f7d12 | 67 | u64 hv_get_non_nested_msr(unsigned int reg) |
7fec185a JJ |
68 | { |
69 | u64 value; | |
70 | ||
0e3f7d12 | 71 | if (hv_is_synic_msr(reg) && ms_hyperv.paravisor_present) |
b9b4fe3a | 72 | hv_ivm_msr_read(reg, &value); |
7fec185a | 73 | else |
c435e608 | 74 | rdmsrq(reg, value); |
7fec185a JJ |
75 | return value; |
76 | } | |
0e3f7d12 | 77 | EXPORT_SYMBOL_GPL(hv_get_non_nested_msr); |
7fec185a | 78 | |
0e3f7d12 | 79 | void hv_set_non_nested_msr(unsigned int reg, u64 value) |
7fec185a | 80 | { |
0e3f7d12 | 81 | if (hv_is_synic_msr(reg) && ms_hyperv.paravisor_present) { |
b9b4fe3a | 82 | hv_ivm_msr_write(reg, value); |
7fec185a JJ |
83 | |
84 | /* Write proxy bit via wrmsl instruction */ | |
0e3f7d12 | 85 | if (hv_is_sint_msr(reg)) |
78255eb2 | 86 | wrmsrq(reg, value | 1 << 20); |
7fec185a | 87 | } else { |
78255eb2 | 88 | wrmsrq(reg, value); |
7fec185a JJ |
89 | } |
90 | } | |
0e3f7d12 | 91 | EXPORT_SYMBOL_GPL(hv_set_non_nested_msr); |
7fec185a | 92 | |
0e3f7d12 | 93 | u64 hv_get_msr(unsigned int reg) |
7fec185a JJ |
94 | { |
95 | if (hv_nested) | |
0e3f7d12 | 96 | reg = hv_get_nested_msr(reg); |
7fec185a | 97 | |
0e3f7d12 | 98 | return hv_get_non_nested_msr(reg); |
7fec185a | 99 | } |
0e3f7d12 | 100 | EXPORT_SYMBOL_GPL(hv_get_msr); |
7fec185a | 101 | |
0e3f7d12 | 102 | void hv_set_msr(unsigned int reg, u64 value) |
7fec185a JJ |
103 | { |
104 | if (hv_nested) | |
0e3f7d12 | 105 | reg = hv_get_nested_msr(reg); |
7fec185a | 106 | |
0e3f7d12 | 107 | hv_set_non_nested_msr(reg, value); |
7fec185a | 108 | } |
0e3f7d12 | 109 | EXPORT_SYMBOL_GPL(hv_set_msr); |
7fec185a | 110 | |
e2575ffe | 111 | static void (*mshv_handler)(void); |
76d388cd | 112 | static void (*vmbus_handler)(void); |
248e742a | 113 | static void (*hv_stimer0_handler)(void); |
1e034743 VK |
114 | static void (*hv_kexec_handler)(void); |
115 | static void (*hv_crash_handler)(struct pt_regs *regs); | |
1aec1696 | 116 | |
a16be368 | 117 | DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback) |
1aec1696 TG |
118 | { |
119 | struct pt_regs *old_regs = set_irq_regs(regs); | |
120 | ||
1aec1696 | 121 | inc_irq_stat(irq_hv_callback_count); |
e2575ffe NDN |
122 | if (mshv_handler) |
123 | mshv_handler(); | |
124 | ||
1aec1696 TG |
125 | if (vmbus_handler) |
126 | vmbus_handler(); | |
127 | ||
7dc9b6b8 | 128 | if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED) |
670c04ad | 129 | apic_eoi(); |
a33fd4c2 | 130 | |
1aec1696 TG |
131 | set_irq_regs(old_regs); |
132 | } | |
133 | ||
e2575ffe NDN |
134 | void hv_setup_mshv_handler(void (*handler)(void)) |
135 | { | |
136 | mshv_handler = handler; | |
137 | } | |
138 | ||
d608715d | 139 | void hv_setup_vmbus_handler(void (*handler)(void)) |
1aec1696 TG |
140 | { |
141 | vmbus_handler = handler; | |
1aec1696 TG |
142 | } |
143 | ||
d608715d | 144 | void hv_remove_vmbus_handler(void) |
1aec1696 TG |
145 | { |
146 | /* We have no way to deallocate the interrupt gate */ | |
147 | vmbus_handler = NULL; | |
148 | } | |
2517281d | 149 | |
248e742a MK |
150 | /* |
151 | * Routines to do per-architecture handling of stimer0 | |
152 | * interrupts when in Direct Mode | |
153 | */ | |
a16be368 | 154 | DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0) |
248e742a MK |
155 | { |
156 | struct pt_regs *old_regs = set_irq_regs(regs); | |
157 | ||
248e742a MK |
158 | inc_irq_stat(hyperv_stimer0_count); |
159 | if (hv_stimer0_handler) | |
160 | hv_stimer0_handler(); | |
703f7066 | 161 | add_interrupt_randomness(HYPERV_STIMER0_VECTOR); |
670c04ad | 162 | apic_eoi(); |
248e742a | 163 | |
248e742a MK |
164 | set_irq_regs(old_regs); |
165 | } | |
166 | ||
ec866be6 MK |
167 | /* For x86/x64, override weak placeholders in hyperv_timer.c */ |
168 | void hv_setup_stimer0_handler(void (*handler)(void)) | |
248e742a | 169 | { |
248e742a | 170 | hv_stimer0_handler = handler; |
248e742a | 171 | } |
248e742a | 172 | |
ec866be6 | 173 | void hv_remove_stimer0_handler(void) |
248e742a MK |
174 | { |
175 | /* We have no way to deallocate the interrupt gate */ | |
176 | hv_stimer0_handler = NULL; | |
177 | } | |
248e742a | 178 | |
2517281d VK |
179 | void hv_setup_kexec_handler(void (*handler)(void)) |
180 | { | |
181 | hv_kexec_handler = handler; | |
182 | } | |
2517281d VK |
183 | |
184 | void hv_remove_kexec_handler(void) | |
185 | { | |
186 | hv_kexec_handler = NULL; | |
187 | } | |
b4370df2 VK |
188 | |
189 | void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)) | |
190 | { | |
191 | hv_crash_handler = handler; | |
192 | } | |
b4370df2 VK |
193 | |
194 | void hv_remove_crash_handler(void) | |
195 | { | |
196 | hv_crash_handler = NULL; | |
197 | } | |
1aec1696 | 198 | |
1e034743 | 199 | #ifdef CONFIG_KEXEC_CORE |
2517281d VK |
200 | static void hv_machine_shutdown(void) |
201 | { | |
202 | if (kexec_in_progress && hv_kexec_handler) | |
203 | hv_kexec_handler(); | |
dfe94d40 DC |
204 | |
205 | /* | |
206 | * Call hv_cpu_die() on all the CPUs, otherwise later the hypervisor | |
207 | * corrupts the old VP Assist Pages and can crash the kexec kernel. | |
208 | */ | |
b9af6418 ARM |
209 | if (kexec_in_progress) |
210 | cpuhp_remove_state(CPUHP_AP_HYPERV_ONLINE); | |
dfe94d40 DC |
211 | |
212 | /* The function calls stop_other_cpus(). */ | |
2517281d | 213 | native_machine_shutdown(); |
dfe94d40 DC |
214 | |
215 | /* Disable the hypercall page when there is only 1 active CPU. */ | |
216 | if (kexec_in_progress) | |
217 | hyperv_cleanup(); | |
2517281d | 218 | } |
a4eeb217 | 219 | #endif /* CONFIG_KEXEC_CORE */ |
2517281d | 220 | |
a4eeb217 | 221 | #ifdef CONFIG_CRASH_DUMP |
b4370df2 VK |
222 | static void hv_machine_crash_shutdown(struct pt_regs *regs) |
223 | { | |
224 | if (hv_crash_handler) | |
225 | hv_crash_handler(regs); | |
dfe94d40 DC |
226 | |
227 | /* The function calls crash_smp_send_stop(). */ | |
b4370df2 | 228 | native_machine_crash_shutdown(regs); |
dfe94d40 DC |
229 | |
230 | /* Disable the hypercall page when there is only 1 active CPU. */ | |
231 | hyperv_cleanup(); | |
b4370df2 | 232 | } |
a4eeb217 | 233 | #endif /* CONFIG_CRASH_DUMP */ |
bcc80dec NJ |
234 | |
235 | static u64 hv_ref_counter_at_suspend; | |
236 | static void (*old_save_sched_clock_state)(void); | |
237 | static void (*old_restore_sched_clock_state)(void); | |
238 | ||
239 | /* | |
240 | * Hyper-V clock counter resets during hibernation. Save and restore clock | |
241 | * offset during suspend/resume, while also considering the time passed | |
242 | * before suspend. This is to make sure that sched_clock using hv tsc page | |
243 | * based clocksource, proceeds from where it left off during suspend and | |
244 | * it shows correct time for the timestamps of kernel messages after resume. | |
245 | */ | |
246 | static void save_hv_clock_tsc_state(void) | |
247 | { | |
248 | hv_ref_counter_at_suspend = hv_read_reference_counter(); | |
249 | } | |
250 | ||
251 | static void restore_hv_clock_tsc_state(void) | |
252 | { | |
253 | /* | |
254 | * Adjust the offsets used by hv tsc clocksource to | |
255 | * account for the time spent before hibernation. | |
256 | * adjusted value = reference counter (time) at suspend | |
257 | * - reference counter (time) now. | |
258 | */ | |
259 | hv_adj_sched_clock_offset(hv_ref_counter_at_suspend - hv_read_reference_counter()); | |
260 | } | |
261 | ||
262 | /* | |
263 | * Functions to override save_sched_clock_state and restore_sched_clock_state | |
264 | * functions of x86_platform. The Hyper-V clock counter is reset during | |
265 | * suspend-resume and the offset used to measure time needs to be | |
266 | * corrected, post resume. | |
267 | */ | |
268 | static void hv_save_sched_clock_state(void) | |
269 | { | |
270 | old_save_sched_clock_state(); | |
271 | save_hv_clock_tsc_state(); | |
272 | } | |
273 | ||
274 | static void hv_restore_sched_clock_state(void) | |
275 | { | |
276 | restore_hv_clock_tsc_state(); | |
277 | old_restore_sched_clock_state(); | |
278 | } | |
279 | ||
280 | static void __init x86_setup_ops_for_tsc_pg_clock(void) | |
281 | { | |
282 | if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) | |
283 | return; | |
284 | ||
285 | old_save_sched_clock_state = x86_platform.save_sched_clock_state; | |
286 | x86_platform.save_sched_clock_state = hv_save_sched_clock_state; | |
287 | ||
288 | old_restore_sched_clock_state = x86_platform.restore_sched_clock_state; | |
289 | x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state; | |
290 | } | |
1e034743 | 291 | #endif /* CONFIG_HYPERV */ |
b4370df2 | 292 | |
9df56f19 | 293 | static uint32_t __init ms_hyperv_platform(void) |
a2a47c6c | 294 | { |
e08cae41 PA |
295 | u32 eax; |
296 | u32 hyp_signature[3]; | |
a2a47c6c | 297 | |
e08cae41 | 298 | if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) |
9df56f19 | 299 | return 0; |
a2a47c6c | 300 | |
e08cae41 PA |
301 | cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, |
302 | &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); | |
a2a47c6c | 303 | |
f3e613e7 SC |
304 | if (eax < HYPERV_CPUID_MIN || eax > HYPERV_CPUID_MAX || |
305 | memcmp("Microsoft Hv", hyp_signature, 12)) | |
306 | return 0; | |
9df56f19 | 307 | |
f3e613e7 SC |
308 | /* HYPERCALL and VP_INDEX MSRs are mandatory for all features. */ |
309 | eax = cpuid_eax(HYPERV_CPUID_FEATURES); | |
310 | if (!(eax & HV_MSR_HYPERCALL_AVAILABLE)) { | |
311 | pr_warn("x86/hyperv: HYPERCALL MSR not available.\n"); | |
312 | return 0; | |
313 | } | |
314 | if (!(eax & HV_MSR_VP_INDEX_AVAILABLE)) { | |
315 | pr_warn("x86/hyperv: VP_INDEX MSR not available.\n"); | |
316 | return 0; | |
317 | } | |
318 | ||
319 | return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS; | |
a2a47c6c KS |
320 | } |
321 | ||
59107e2f VK |
322 | #ifdef CONFIG_X86_LOCAL_APIC |
323 | /* | |
324 | * Prior to WS2016 Debug-VM sends NMIs to all CPUs which makes | |
d9f6e12f | 325 | * it difficult to process CHANNELMSG_UNLOAD in case of crash. Handle |
59107e2f VK |
326 | * unknown NMI on the first CPU which gets it. |
327 | */ | |
328 | static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs) | |
329 | { | |
330 | static atomic_t nmi_cpu = ATOMIC_INIT(-1); | |
18286883 | 331 | unsigned int old_cpu, this_cpu; |
59107e2f VK |
332 | |
333 | if (!unknown_nmi_panic) | |
334 | return NMI_DONE; | |
335 | ||
18286883 UB |
336 | old_cpu = -1; |
337 | this_cpu = raw_smp_processor_id(); | |
338 | if (!atomic_try_cmpxchg(&nmi_cpu, &old_cpu, this_cpu)) | |
59107e2f VK |
339 | return NMI_HANDLED; |
340 | ||
341 | return NMI_DONE; | |
342 | } | |
343 | #endif | |
344 | ||
71c2a2d0 VK |
345 | static unsigned long hv_get_tsc_khz(void) |
346 | { | |
347 | unsigned long freq; | |
348 | ||
c435e608 | 349 | rdmsrq(HV_X64_MSR_TSC_FREQUENCY, freq); |
71c2a2d0 VK |
350 | |
351 | return freq / 1000; | |
352 | } | |
353 | ||
3a025de6 YS |
354 | #if defined(CONFIG_SMP) && IS_ENABLED(CONFIG_HYPERV) |
355 | static void __init hv_smp_prepare_boot_cpu(void) | |
356 | { | |
357 | native_smp_prepare_boot_cpu(); | |
358 | #if defined(CONFIG_X86_64) && defined(CONFIG_PARAVIRT_SPINLOCKS) | |
359 | hv_init_spinlocks(); | |
360 | #endif | |
361 | } | |
333abaf5 WL |
362 | |
363 | static void __init hv_smp_prepare_cpus(unsigned int max_cpus) | |
364 | { | |
365 | #ifdef CONFIG_X86_64 | |
366 | int i; | |
367 | int ret; | |
368 | #endif | |
369 | ||
370 | native_smp_prepare_cpus(max_cpus); | |
371 | ||
44676bb9 TL |
372 | /* |
373 | * Override wakeup_secondary_cpu_64 callback for SEV-SNP | |
374 | * enlightened guest. | |
375 | */ | |
e3131f1c | 376 | if (!ms_hyperv.paravisor_present && hv_isolation_type_snp()) { |
44676bb9 TL |
377 | apic->wakeup_secondary_cpu_64 = hv_snp_boot_ap; |
378 | return; | |
379 | } | |
380 | ||
333abaf5 WL |
381 | #ifdef CONFIG_X86_64 |
382 | for_each_present_cpu(i) { | |
383 | if (i == 0) | |
384 | continue; | |
f5a11c69 | 385 | ret = hv_call_add_logical_proc(numa_cpu_node(i), i, cpu_physical_id(i)); |
333abaf5 WL |
386 | BUG_ON(ret); |
387 | } | |
388 | ||
389 | for_each_present_cpu(i) { | |
390 | if (i == 0) | |
391 | continue; | |
392 | ret = hv_call_create_vp(numa_cpu_node(i), hv_current_partition_id, i, i); | |
393 | BUG_ON(ret); | |
394 | } | |
395 | #endif | |
396 | } | |
3a025de6 YS |
397 | #endif |
398 | ||
0719881b DC |
399 | /* |
400 | * When a fully enlightened TDX VM runs on Hyper-V, the firmware sets the | |
401 | * HW_REDUCED flag: refer to acpi_tb_create_local_fadt(). Consequently ttyS0 | |
402 | * interrupts can't work because request_irq() -> ... -> irq_to_desc() returns | |
403 | * NULL for ttyS0. This happens because mp_config_acpi_legacy_irqs() sees a | |
404 | * nr_legacy_irqs() of 0, so it doesn't initialize the array 'mp_irqs[]', and | |
405 | * later setup_IO_APIC_irqs() -> find_irq_entry() fails to find the legacy irqs | |
406 | * from the array and hence doesn't create the necessary irq description info. | |
407 | * | |
408 | * Clone arch/x86/kernel/acpi/boot.c: acpi_generic_reduced_hw_init() here, | |
409 | * except don't change 'legacy_pic', which keeps its default value | |
410 | * 'default_legacy_pic'. This way, mp_config_acpi_legacy_irqs() sees a non-zero | |
411 | * nr_legacy_irqs() and eventually serial console interrupts works properly. | |
412 | */ | |
413 | static void __init reduced_hw_init(void) | |
414 | { | |
415 | x86_init.timers.timer_init = x86_init_noop; | |
416 | x86_init.irqs.pre_vector_init = x86_init_noop; | |
417 | } | |
418 | ||
410779d8 NDN |
419 | int hv_get_hypervisor_version(union hv_hypervisor_version_info *info) |
420 | { | |
421 | unsigned int hv_max_functions; | |
422 | ||
423 | hv_max_functions = cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS); | |
424 | if (hv_max_functions < HYPERV_CPUID_VERSION) { | |
425 | pr_err("%s: Could not detect Hyper-V version\n", __func__); | |
426 | return -ENODEV; | |
427 | } | |
428 | ||
429 | cpuid(HYPERV_CPUID_VERSION, &info->eax, &info->ebx, &info->ecx, &info->edx); | |
430 | ||
431 | return 0; | |
432 | } | |
21050f61 | 433 | EXPORT_SYMBOL_GPL(hv_get_hypervisor_version); |
410779d8 | 434 | |
e08cae41 | 435 | static void __init ms_hyperv_init_platform(void) |
a2a47c6c | 436 | { |
a6c776a9 | 437 | int hv_max_functions_eax; |
8de8af7e | 438 | |
f7c0f50f AP |
439 | #ifdef CONFIG_PARAVIRT |
440 | pv_info.name = "Hyper-V"; | |
441 | #endif | |
442 | ||
a2a47c6c | 443 | /* |
e08cae41 | 444 | * Extract the features and hints |
a2a47c6c | 445 | */ |
e08cae41 | 446 | ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); |
6dc2a774 | 447 | ms_hyperv.priv_high = cpuid_ebx(HYPERV_CPUID_FEATURES); |
8cac5179 | 448 | ms_hyperv.ext_features = cpuid_ecx(HYPERV_CPUID_FEATURES); |
cc2dd402 | 449 | ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); |
e08cae41 | 450 | ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); |
a2a47c6c | 451 | |
a6c776a9 VP |
452 | hv_max_functions_eax = cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS); |
453 | ||
8cac5179 SK |
454 | pr_info("Hyper-V: privilege flags low %#x, high %#x, ext %#x, hints %#x, misc %#x\n", |
455 | ms_hyperv.features, ms_hyperv.priv_high, | |
456 | ms_hyperv.ext_features, ms_hyperv.hints, | |
6dc2a774 | 457 | ms_hyperv.misc_features); |
6f4151c8 | 458 | |
415bd1cd VK |
459 | ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS); |
460 | ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS); | |
dd018597 VK |
461 | |
462 | pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n", | |
463 | ms_hyperv.max_vp_index, ms_hyperv.max_lp_index); | |
464 | ||
db912b89 | 465 | hv_identify_partition_type(); |
e9977202 | 466 | |
c4bdf94f JJ |
467 | if (ms_hyperv.hints & HV_X64_HYPERV_NESTED) { |
468 | hv_nested = true; | |
469 | pr_info("Hyper-V: running on a nested hypervisor\n"); | |
470 | } | |
471 | ||
e1471463 | 472 | if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS && |
71c2a2d0 VK |
473 | ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { |
474 | x86_platform.calibrate_tsc = hv_get_tsc_khz; | |
475 | x86_platform.calibrate_cpu = hv_get_tsc_khz; | |
8fcc5148 | 476 | setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); |
71c2a2d0 VK |
477 | } |
478 | ||
6dc2a774 | 479 | if (ms_hyperv.priv_high & HV_ISOLATION) { |
a6c76bb0 APM |
480 | ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG); |
481 | ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG); | |
812b0597 MK |
482 | |
483 | if (ms_hyperv.shared_gpa_boundary_active) | |
484 | ms_hyperv.shared_gpa_boundary = | |
485 | BIT_ULL(ms_hyperv.shared_gpa_boundary_bits); | |
a6c76bb0 | 486 | |
d3a9d7e4 DC |
487 | hyperv_paravisor_present = !!ms_hyperv.paravisor_present; |
488 | ||
a6c76bb0 APM |
489 | pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n", |
490 | ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b); | |
0cc4f6d9 | 491 | |
d6e2d652 | 492 | |
86e619c9 | 493 | if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) { |
0cc4f6d9 | 494 | static_branch_enable(&isolation_type_snp); |
08e9d120 DC |
495 | } else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) { |
496 | static_branch_enable(&isolation_type_tdx); | |
68f2f2bc DC |
497 | |
498 | /* A TDX VM must use x2APIC and doesn't use lazy EOI. */ | |
499 | ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED; | |
500 | ||
501 | if (!ms_hyperv.paravisor_present) { | |
7f828d5f DC |
502 | /* |
503 | * Mark the Hyper-V TSC page feature as disabled | |
504 | * in a TDX VM without paravisor so that the | |
505 | * Invariant TSC, which is a better clocksource | |
506 | * anyway, is used instead. | |
507 | */ | |
68f2f2bc DC |
508 | ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE; |
509 | ||
7f828d5f DC |
510 | /* |
511 | * The Invariant TSC is expected to be available | |
512 | * in a TDX VM without paravisor, but if not, | |
513 | * print a warning message. The slower Hyper-V MSR-based | |
514 | * Ref Counter should end up being the clocksource. | |
515 | */ | |
516 | if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)) | |
517 | pr_warn("Hyper-V: Invariant TSC is unavailable\n"); | |
518 | ||
b967df62 | 519 | /* HV_MSR_CRASH_CTL is unsupported. */ |
68f2f2bc DC |
520 | ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; |
521 | ||
522 | /* Don't trust Hyper-V's TLB-flushing hypercalls. */ | |
523 | ms_hyperv.hints &= ~HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; | |
0719881b DC |
524 | |
525 | x86_init.acpi.reduced_hw_early_init = reduced_hw_init; | |
68f2f2bc | 526 | } |
d6e2d652 | 527 | } |
a6c76bb0 APM |
528 | } |
529 | ||
a6c776a9 | 530 | if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) { |
5431390b VK |
531 | ms_hyperv.nested_features = |
532 | cpuid_eax(HYPERV_CPUID_NESTED_FEATURES); | |
a6c776a9 VP |
533 | pr_info("Hyper-V: Nested features: 0x%x\n", |
534 | ms_hyperv.nested_features); | |
5431390b VK |
535 | } |
536 | ||
90ab9d55 | 537 | #ifdef CONFIG_X86_LOCAL_APIC |
e1471463 | 538 | if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS && |
2cf02842 | 539 | ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { |
9e7827b5 S |
540 | /* |
541 | * Get the APIC frequency. | |
542 | */ | |
4c08edd3 PA |
543 | u64 hv_lapic_frequency; |
544 | ||
c435e608 | 545 | rdmsrq(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency); |
9e7827b5 | 546 | hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ); |
52ae346b | 547 | lapic_timer_period = hv_lapic_frequency; |
dd018597 | 548 | pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n", |
52ae346b | 549 | lapic_timer_period); |
9e7827b5 | 550 | } |
59107e2f VK |
551 | |
552 | register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST, | |
553 | "hv_nmi_unknown"); | |
90ab9d55 | 554 | #endif |
9e7827b5 | 555 | |
ca3ba2a2 JW |
556 | #ifdef CONFIG_X86_IO_APIC |
557 | no_timer_check = 1; | |
558 | #endif | |
559 | ||
a4eeb217 BH |
560 | #if IS_ENABLED(CONFIG_HYPERV) |
561 | #if defined(CONFIG_KEXEC_CORE) | |
2517281d | 562 | machine_ops.shutdown = hv_machine_shutdown; |
a4eeb217 BH |
563 | #endif |
564 | #if defined(CONFIG_CRASH_DUMP) | |
b4370df2 | 565 | machine_ops.crash_shutdown = hv_machine_crash_shutdown; |
a4eeb217 | 566 | #endif |
1e034743 | 567 | #endif |
e1471463 | 568 | if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { |
5f92b45c AS |
569 | /* |
570 | * Writing to synthetic MSR 0x40000118 updates/changes the | |
571 | * guest visible CPUIDs. Setting bit 0 of this MSR enables | |
572 | * guests to report invariant TSC feature through CPUID | |
573 | * instruction, CPUID 0x800000007/EDX, bit 8. See code in | |
574 | * early_init_intel() where this bit is examined. The | |
575 | * setting of this MSR bit should happen before init_intel() | |
576 | * is called. | |
577 | */ | |
78255eb2 | 578 | wrmsrq(HV_X64_MSR_TSC_INVARIANT_CONTROL, HV_EXPOSE_INVARIANT_TSC); |
dce7cd62 | 579 | setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); |
dce7cd62 | 580 | } |
1e2ae9ec VK |
581 | |
582 | /* | |
583 | * Generation 2 instances don't support reading the NMI status from | |
584 | * 0x61 port. | |
585 | */ | |
586 | if (efi_enabled(EFI_BOOT)) | |
587 | x86_platform.get_nmi_reason = hv_get_nmi_reason; | |
8730046c S |
588 | |
589 | #if IS_ENABLED(CONFIG_HYPERV) | |
812b0597 | 590 | if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) || |
d3a9d7e4 | 591 | ms_hyperv.paravisor_present) |
812b0597 | 592 | hv_vtom_init(); |
8730046c S |
593 | /* |
594 | * Setup the hook to get control post apic initialization. | |
595 | */ | |
596 | x86_platform.apic_post_init = hyperv_init; | |
2ffd9e33 | 597 | hyperv_setup_mmu_ops(); |
93286261 | 598 | |
8f4a29b0 XL |
599 | /* Install system interrupt handler for hypervisor callback */ |
600 | sysvec_install(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback); | |
601 | ||
602 | /* Install system interrupt handler for reenlightenment notifications */ | |
e1471463 | 603 | if (ms_hyperv.features & HV_ACCESS_REENLIGHTENMENT) { |
8f4a29b0 | 604 | sysvec_install(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment); |
a16be368 | 605 | } |
93286261 | 606 | |
8f4a29b0 | 607 | /* Install system interrupt handler for stimer0 */ |
a16be368 | 608 | if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) { |
8f4a29b0 | 609 | sysvec_install(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0); |
a16be368 | 610 | } |
3a025de6 YS |
611 | |
612 | # ifdef CONFIG_SMP | |
613 | smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu; | |
db912b89 | 614 | if (hv_root_partition() || |
e3131f1c | 615 | (!ms_hyperv.paravisor_present && hv_isolation_type_snp())) |
333abaf5 | 616 | smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus; |
3a025de6 | 617 | # endif |
84fdfafa LT |
618 | |
619 | /* | |
620 | * Hyper-V doesn't provide irq remapping for IO-APIC. To enable x2apic, | |
d9f6e12f | 621 | * set x2apic destination mode to physical mode when x2apic is available |
84fdfafa LT |
622 | * and Hyper-V IOMMU driver makes sure cpus assigned with IO-APIC irqs |
623 | * have 8-bit APIC id. | |
624 | */ | |
625 | # ifdef CONFIG_X86_X2APIC | |
626 | if (x2apic_supported()) | |
627 | x2apic_phys = 1; | |
628 | # endif | |
629 | ||
bd00cd52 TL |
630 | /* Register Hyper-V specific clocksource */ |
631 | hv_init_clocksource(); | |
bcc80dec | 632 | x86_setup_ops_for_tsc_pg_clock(); |
3be1bc2f | 633 | hv_vtl_init_platform(); |
8730046c | 634 | #endif |
c445535c AS |
635 | /* |
636 | * TSC should be marked as unstable only after Hyper-V | |
637 | * clocksource has been initialized. This ensures that the | |
638 | * stability of the sched_clock is not altered. | |
639 | */ | |
640 | if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)) | |
641 | mark_tsc_unstable("running on Hyper-V"); | |
f1f8288d MK |
642 | |
643 | hardlockup_detector_disable(); | |
a2a47c6c | 644 | } |
e08cae41 | 645 | |
d981059e DC |
646 | static bool __init ms_hyperv_x2apic_available(void) |
647 | { | |
648 | return x2apic_supported(); | |
649 | } | |
650 | ||
651 | /* | |
652 | * If ms_hyperv_msi_ext_dest_id() returns true, hyperv_prepare_irq_remapping() | |
653 | * returns -ENODEV and the Hyper-V IOMMU driver is not used; instead, the | |
654 | * generic support of the 15-bit APIC ID is used: see __irq_msi_compose_msg(). | |
655 | * | |
656 | * Note: for a VM on Hyper-V, the I/O-APIC is the only device which | |
657 | * (logically) generates MSIs directly to the system APIC irq domain. | |
658 | * There is no HPET, and PCI MSI/MSI-X interrupts are remapped by the | |
659 | * pci-hyperv host bridge. | |
fea858dc NDN |
660 | * |
661 | * Note: for a Hyper-V root partition, this will always return false. | |
662 | * The hypervisor doesn't expose these HYPERV_CPUID_VIRT_STACK_* cpuids by | |
663 | * default, they are implemented as intercepts by the Windows Hyper-V stack. | |
664 | * Even a nested root partition (L2 root) will not get them because the | |
665 | * nested (L1) hypervisor filters them out. | |
d981059e DC |
666 | */ |
667 | static bool __init ms_hyperv_msi_ext_dest_id(void) | |
668 | { | |
669 | u32 eax; | |
670 | ||
671 | eax = cpuid_eax(HYPERV_CPUID_VIRT_STACK_INTERFACE); | |
672 | if (eax != HYPERV_VS_INTERFACE_EAX_SIGNATURE) | |
673 | return false; | |
674 | ||
675 | eax = cpuid_eax(HYPERV_CPUID_VIRT_STACK_PROPERTIES); | |
676 | return eax & HYPERV_VS_PROPERTIES_EAX_EXTENDED_IOAPIC_RTE; | |
677 | } | |
678 | ||
4754ec7f TL |
679 | #ifdef CONFIG_AMD_MEM_ENCRYPT |
680 | static void hv_sev_es_hcall_prepare(struct ghcb *ghcb, struct pt_regs *regs) | |
681 | { | |
682 | /* RAX and CPL are already in the GHCB */ | |
683 | ghcb_set_rcx(ghcb, regs->cx); | |
684 | ghcb_set_rdx(ghcb, regs->dx); | |
685 | ghcb_set_r8(ghcb, regs->r8); | |
686 | } | |
687 | ||
688 | static bool hv_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs) | |
689 | { | |
690 | /* No checking of the return state needed */ | |
691 | return true; | |
692 | } | |
693 | #endif | |
694 | ||
03b2a320 | 695 | const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { |
dd018597 | 696 | .name = "Microsoft Hyper-V", |
e08cae41 | 697 | .detect = ms_hyperv_platform, |
03b2a320 | 698 | .type = X86_HYPER_MS_HYPERV, |
d981059e DC |
699 | .init.x2apic_available = ms_hyperv_x2apic_available, |
700 | .init.msi_ext_dest_id = ms_hyperv_msi_ext_dest_id, | |
f72e38e8 | 701 | .init.init_platform = ms_hyperv_init_platform, |
f2580a90 | 702 | .init.guest_late_init = ms_hyperv_late_init, |
4754ec7f TL |
703 | #ifdef CONFIG_AMD_MEM_ENCRYPT |
704 | .runtime.sev_es_hcall_prepare = hv_sev_es_hcall_prepare, | |
705 | .runtime.sev_es_hcall_finish = hv_sev_es_hcall_finish, | |
706 | #endif | |
e08cae41 | 707 | }; |