Commit | Line | Data |
---|---|---|
fd1fea68 MK |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | /* | |
4 | * Clocksource driver for the synthetic counter and timers | |
5 | * provided by the Hyper-V hypervisor to guest VMs, as described | |
6 | * in the Hyper-V Top Level Functional Spec (TLFS). This driver | |
7 | * is instruction set architecture independent. | |
8 | * | |
9 | * Copyright (C) 2019, Microsoft, Inc. | |
10 | * | |
11 | * Author: Michael Kelley <mikelley@microsoft.com> | |
12 | */ | |
13 | ||
14 | #include <linux/percpu.h> | |
15 | #include <linux/cpumask.h> | |
16 | #include <linux/clockchips.h> | |
dd2cb348 MK |
17 | #include <linux/clocksource.h> |
18 | #include <linux/sched_clock.h> | |
fd1fea68 | 19 | #include <linux/mm.h> |
4df4cb9e | 20 | #include <linux/cpuhotplug.h> |
ec866be6 MK |
21 | #include <linux/interrupt.h> |
22 | #include <linux/irq.h> | |
23 | #include <linux/acpi.h> | |
4ad1aa57 | 24 | #include <linux/hyperv.h> |
fd1fea68 MK |
25 | #include <clocksource/hyperv_timer.h> |
26 | #include <asm/hyperv-tlfs.h> | |
27 | #include <asm/mshyperv.h> | |
28 | ||
29 | static struct clock_event_device __percpu *hv_clock_event; | |
bd00cd52 | 30 | static u64 hv_sched_clock_offset __ro_after_init; |
fd1fea68 MK |
31 | |
32 | /* | |
33 | * If false, we're using the old mechanism for stimer0 interrupts | |
34 | * where it sends a VMbus message when it expires. The old | |
35 | * mechanism is used when running on older versions of Hyper-V | |
36 | * that don't support Direct Mode. While Hyper-V provides | |
37 | * four stimer's per CPU, Linux uses only stimer0. | |
4df4cb9e MK |
38 | * |
39 | * Because Direct Mode does not require processing a VMbus | |
40 | * message, stimer interrupts can be enabled earlier in the | |
41 | * process of booting a CPU, and consistent with when timer | |
42 | * interrupts are enabled for other clocksource drivers. | |
43 | * However, for legacy versions of Hyper-V when Direct Mode | |
44 | * is not enabled, setting up stimer interrupts must be | |
45 | * delayed until VMbus is initialized and can process the | |
46 | * interrupt message. | |
fd1fea68 MK |
47 | */ |
48 | static bool direct_mode_enabled; | |
49 | ||
ec866be6 | 50 | static int stimer0_irq = -1; |
fd1fea68 | 51 | static int stimer0_message_sint; |
ec866be6 | 52 | static DEFINE_PER_CPU(long, stimer0_evt); |
fd1fea68 MK |
53 | |
54 | /* | |
ec866be6 MK |
55 | * Common code for stimer0 interrupts coming via Direct Mode or |
56 | * as a VMbus message. | |
fd1fea68 MK |
57 | */ |
58 | void hv_stimer0_isr(void) | |
59 | { | |
60 | struct clock_event_device *ce; | |
61 | ||
62 | ce = this_cpu_ptr(hv_clock_event); | |
63 | ce->event_handler(ce); | |
64 | } | |
65 | EXPORT_SYMBOL_GPL(hv_stimer0_isr); | |
66 | ||
ec866be6 MK |
67 | /* |
68 | * stimer0 interrupt handler for architectures that support | |
69 | * per-cpu interrupts, which also implies Direct Mode. | |
70 | */ | |
71 | static irqreturn_t hv_stimer0_percpu_isr(int irq, void *dev_id) | |
72 | { | |
73 | hv_stimer0_isr(); | |
74 | return IRQ_HANDLED; | |
75 | } | |
76 | ||
fd1fea68 MK |
77 | static int hv_ce_set_next_event(unsigned long delta, |
78 | struct clock_event_device *evt) | |
79 | { | |
80 | u64 current_tick; | |
81 | ||
0af3e137 | 82 | current_tick = hv_read_reference_counter(); |
fd1fea68 | 83 | current_tick += delta; |
f3c5e63c | 84 | hv_set_register(HV_REGISTER_STIMER0_COUNT, current_tick); |
fd1fea68 MK |
85 | return 0; |
86 | } | |
87 | ||
88 | static int hv_ce_shutdown(struct clock_event_device *evt) | |
89 | { | |
f3c5e63c MK |
90 | hv_set_register(HV_REGISTER_STIMER0_COUNT, 0); |
91 | hv_set_register(HV_REGISTER_STIMER0_CONFIG, 0); | |
ec866be6 MK |
92 | if (direct_mode_enabled && stimer0_irq >= 0) |
93 | disable_percpu_irq(stimer0_irq); | |
fd1fea68 MK |
94 | |
95 | return 0; | |
96 | } | |
97 | ||
98 | static int hv_ce_set_oneshot(struct clock_event_device *evt) | |
99 | { | |
100 | union hv_stimer_config timer_cfg; | |
101 | ||
102 | timer_cfg.as_uint64 = 0; | |
103 | timer_cfg.enable = 1; | |
104 | timer_cfg.auto_enable = 1; | |
105 | if (direct_mode_enabled) { | |
106 | /* | |
107 | * When it expires, the timer will directly interrupt | |
108 | * on the specified hardware vector/IRQ. | |
109 | */ | |
110 | timer_cfg.direct_mode = 1; | |
ec866be6 MK |
111 | timer_cfg.apic_vector = HYPERV_STIMER0_VECTOR; |
112 | if (stimer0_irq >= 0) | |
113 | enable_percpu_irq(stimer0_irq, IRQ_TYPE_NONE); | |
fd1fea68 MK |
114 | } else { |
115 | /* | |
116 | * When it expires, the timer will generate a VMbus message, | |
117 | * to be handled by the normal VMbus interrupt handler. | |
118 | */ | |
119 | timer_cfg.direct_mode = 0; | |
120 | timer_cfg.sintx = stimer0_message_sint; | |
121 | } | |
f3c5e63c | 122 | hv_set_register(HV_REGISTER_STIMER0_CONFIG, timer_cfg.as_uint64); |
fd1fea68 MK |
123 | return 0; |
124 | } | |
125 | ||
126 | /* | |
127 | * hv_stimer_init - Per-cpu initialization of the clockevent | |
128 | */ | |
4df4cb9e | 129 | static int hv_stimer_init(unsigned int cpu) |
fd1fea68 MK |
130 | { |
131 | struct clock_event_device *ce; | |
132 | ||
4df4cb9e MK |
133 | if (!hv_clock_event) |
134 | return 0; | |
fd1fea68 MK |
135 | |
136 | ce = per_cpu_ptr(hv_clock_event, cpu); | |
137 | ce->name = "Hyper-V clockevent"; | |
138 | ce->features = CLOCK_EVT_FEAT_ONESHOT; | |
139 | ce->cpumask = cpumask_of(cpu); | |
140 | ce->rating = 1000; | |
141 | ce->set_state_shutdown = hv_ce_shutdown; | |
142 | ce->set_state_oneshot = hv_ce_set_oneshot; | |
143 | ce->set_next_event = hv_ce_set_next_event; | |
144 | ||
145 | clockevents_config_and_register(ce, | |
146 | HV_CLOCK_HZ, | |
147 | HV_MIN_DELTA_TICKS, | |
148 | HV_MAX_MAX_DELTA_TICKS); | |
4df4cb9e | 149 | return 0; |
fd1fea68 | 150 | } |
fd1fea68 MK |
151 | |
152 | /* | |
153 | * hv_stimer_cleanup - Per-cpu cleanup of the clockevent | |
154 | */ | |
4df4cb9e | 155 | int hv_stimer_cleanup(unsigned int cpu) |
fd1fea68 MK |
156 | { |
157 | struct clock_event_device *ce; | |
158 | ||
4df4cb9e MK |
159 | if (!hv_clock_event) |
160 | return 0; | |
161 | ||
162 | /* | |
163 | * In the legacy case where Direct Mode is not enabled | |
164 | * (which can only be on x86/64), stimer cleanup happens | |
165 | * relatively early in the CPU offlining process. We | |
166 | * must unbind the stimer-based clockevent device so | |
167 | * that the LAPIC timer can take over until clockevents | |
168 | * are no longer needed in the offlining process. Note | |
169 | * that clockevents_unbind_device() eventually calls | |
170 | * hv_ce_shutdown(). | |
171 | * | |
172 | * The unbind should not be done when Direct Mode is | |
173 | * enabled because we may be on an architecture where | |
174 | * there are no other clockevent devices to fallback to. | |
175 | */ | |
176 | ce = per_cpu_ptr(hv_clock_event, cpu); | |
177 | if (direct_mode_enabled) | |
fd1fea68 | 178 | hv_ce_shutdown(ce); |
4df4cb9e MK |
179 | else |
180 | clockevents_unbind_device(ce, cpu); | |
181 | ||
182 | return 0; | |
fd1fea68 MK |
183 | } |
184 | EXPORT_SYMBOL_GPL(hv_stimer_cleanup); | |
185 | ||
ec866be6 MK |
186 | /* |
187 | * These placeholders are overridden by arch specific code on | |
188 | * architectures that need special setup of the stimer0 IRQ because | |
189 | * they don't support per-cpu IRQs (such as x86/x64). | |
190 | */ | |
191 | void __weak hv_setup_stimer0_handler(void (*handler)(void)) | |
192 | { | |
193 | }; | |
194 | ||
195 | void __weak hv_remove_stimer0_handler(void) | |
196 | { | |
197 | }; | |
198 | ||
199 | /* Called only on architectures with per-cpu IRQs (i.e., not x86/x64) */ | |
200 | static int hv_setup_stimer0_irq(void) | |
201 | { | |
202 | int ret; | |
203 | ||
204 | ret = acpi_register_gsi(NULL, HYPERV_STIMER0_VECTOR, | |
205 | ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_HIGH); | |
206 | if (ret < 0) { | |
207 | pr_err("Can't register Hyper-V stimer0 GSI. Error %d", ret); | |
208 | return ret; | |
209 | } | |
210 | stimer0_irq = ret; | |
211 | ||
212 | ret = request_percpu_irq(stimer0_irq, hv_stimer0_percpu_isr, | |
213 | "Hyper-V stimer0", &stimer0_evt); | |
214 | if (ret) { | |
215 | pr_err("Can't request Hyper-V stimer0 IRQ %d. Error %d", | |
216 | stimer0_irq, ret); | |
217 | acpi_unregister_gsi(stimer0_irq); | |
218 | stimer0_irq = -1; | |
219 | } | |
220 | return ret; | |
221 | } | |
222 | ||
223 | static void hv_remove_stimer0_irq(void) | |
224 | { | |
225 | if (stimer0_irq == -1) { | |
226 | hv_remove_stimer0_handler(); | |
227 | } else { | |
228 | free_percpu_irq(stimer0_irq, &stimer0_evt); | |
229 | acpi_unregister_gsi(stimer0_irq); | |
230 | stimer0_irq = -1; | |
231 | } | |
232 | } | |
233 | ||
fd1fea68 | 234 | /* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */ |
ec866be6 | 235 | int hv_stimer_alloc(bool have_percpu_irqs) |
fd1fea68 | 236 | { |
ec866be6 | 237 | int ret; |
4df4cb9e MK |
238 | |
239 | /* | |
240 | * Synthetic timers are always available except on old versions of | |
241 | * Hyper-V on x86. In that case, return as error as Linux will use a | |
242 | * clockevent based on emulated LAPIC timer hardware. | |
243 | */ | |
244 | if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE)) | |
245 | return -EINVAL; | |
fd1fea68 MK |
246 | |
247 | hv_clock_event = alloc_percpu(struct clock_event_device); | |
248 | if (!hv_clock_event) | |
249 | return -ENOMEM; | |
250 | ||
251 | direct_mode_enabled = ms_hyperv.misc_features & | |
252 | HV_STIMER_DIRECT_MODE_AVAILABLE; | |
ec866be6 MK |
253 | |
254 | /* | |
255 | * If Direct Mode isn't enabled, the remainder of the initialization | |
256 | * is done later by hv_stimer_legacy_init() | |
257 | */ | |
258 | if (!direct_mode_enabled) | |
259 | return 0; | |
260 | ||
261 | if (have_percpu_irqs) { | |
262 | ret = hv_setup_stimer0_irq(); | |
4df4cb9e | 263 | if (ret) |
ec866be6 MK |
264 | goto free_clock_event; |
265 | } else { | |
266 | hv_setup_stimer0_handler(hv_stimer0_isr); | |
267 | } | |
4df4cb9e | 268 | |
ec866be6 MK |
269 | /* |
270 | * Since we are in Direct Mode, stimer initialization | |
271 | * can be done now with a CPUHP value in the same range | |
272 | * as other clockevent devices. | |
273 | */ | |
274 | ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING, | |
275 | "clockevents/hyperv/stimer:starting", | |
276 | hv_stimer_init, hv_stimer_cleanup); | |
277 | if (ret < 0) { | |
278 | hv_remove_stimer0_irq(); | |
279 | goto free_clock_event; | |
fd1fea68 | 280 | } |
4df4cb9e | 281 | return ret; |
fd1fea68 | 282 | |
ec866be6 | 283 | free_clock_event: |
4df4cb9e MK |
284 | free_percpu(hv_clock_event); |
285 | hv_clock_event = NULL; | |
286 | return ret; | |
fd1fea68 MK |
287 | } |
288 | EXPORT_SYMBOL_GPL(hv_stimer_alloc); | |
289 | ||
4df4cb9e MK |
290 | /* |
291 | * hv_stimer_legacy_init -- Called from the VMbus driver to handle | |
292 | * the case when Direct Mode is not enabled, and the stimer | |
293 | * must be initialized late in the CPU onlining process. | |
294 | * | |
295 | */ | |
296 | void hv_stimer_legacy_init(unsigned int cpu, int sint) | |
297 | { | |
298 | if (direct_mode_enabled) | |
299 | return; | |
300 | ||
301 | /* | |
302 | * This function gets called by each vCPU, so setting the | |
303 | * global stimer_message_sint value each time is conceptually | |
304 | * not ideal, but the value passed in is always the same and | |
305 | * it avoids introducing yet another interface into this | |
306 | * clocksource driver just to set the sint in the legacy case. | |
307 | */ | |
308 | stimer0_message_sint = sint; | |
309 | (void)hv_stimer_init(cpu); | |
310 | } | |
311 | EXPORT_SYMBOL_GPL(hv_stimer_legacy_init); | |
312 | ||
313 | /* | |
314 | * hv_stimer_legacy_cleanup -- Called from the VMbus driver to | |
315 | * handle the case when Direct Mode is not enabled, and the | |
316 | * stimer must be cleaned up early in the CPU offlining | |
317 | * process. | |
318 | */ | |
319 | void hv_stimer_legacy_cleanup(unsigned int cpu) | |
320 | { | |
321 | if (direct_mode_enabled) | |
322 | return; | |
323 | (void)hv_stimer_cleanup(cpu); | |
324 | } | |
325 | EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup); | |
326 | ||
fd1fea68 MK |
327 | /* |
328 | * Do a global cleanup of clockevents for the cases of kexec and | |
329 | * vmbus exit | |
330 | */ | |
331 | void hv_stimer_global_cleanup(void) | |
332 | { | |
333 | int cpu; | |
fd1fea68 | 334 | |
4df4cb9e MK |
335 | /* |
336 | * hv_stime_legacy_cleanup() will stop the stimer if Direct | |
337 | * Mode is not enabled, and fallback to the LAPIC timer. | |
338 | */ | |
339 | for_each_present_cpu(cpu) { | |
340 | hv_stimer_legacy_cleanup(cpu); | |
fd1fea68 | 341 | } |
4df4cb9e | 342 | |
ec866be6 MK |
343 | if (!hv_clock_event) |
344 | return; | |
345 | ||
346 | if (direct_mode_enabled) { | |
347 | cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING); | |
348 | hv_remove_stimer0_irq(); | |
349 | stimer0_irq = -1; | |
350 | } | |
351 | free_percpu(hv_clock_event); | |
352 | hv_clock_event = NULL; | |
353 | ||
fd1fea68 MK |
354 | } |
355 | EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); | |
dd2cb348 MK |
356 | |
357 | /* | |
358 | * Code and definitions for the Hyper-V clocksources. Two | |
359 | * clocksources are defined: one that reads the Hyper-V defined MSR, and | |
360 | * the other that uses the TSC reference page feature as defined in the | |
361 | * TLFS. The MSR version is for compatibility with old versions of | |
362 | * Hyper-V and 32-bit x86. The TSC reference page version is preferred. | |
363 | */ | |
364 | ||
ddc61bbc BF |
365 | static union { |
366 | struct ms_hyperv_tsc_page page; | |
367 | u8 reserved[PAGE_SIZE]; | |
368 | } tsc_pg __aligned(PAGE_SIZE); | |
dd2cb348 | 369 | |
76be6331 | 370 | static struct ms_hyperv_tsc_page *tsc_page = &tsc_pg.page; |
e1f5c66d SK |
371 | static unsigned long tsc_pfn; |
372 | ||
364adc45 | 373 | unsigned long hv_get_tsc_pfn(void) |
e1f5c66d SK |
374 | { |
375 | return tsc_pfn; | |
376 | } | |
364adc45 | 377 | EXPORT_SYMBOL_GPL(hv_get_tsc_pfn); |
76be6331 | 378 | |
dd2cb348 MK |
379 | struct ms_hyperv_tsc_page *hv_get_tsc_page(void) |
380 | { | |
76be6331 | 381 | return tsc_page; |
dd2cb348 MK |
382 | } |
383 | EXPORT_SYMBOL_GPL(hv_get_tsc_page); | |
384 | ||
0af3e137 | 385 | static u64 notrace read_hv_clock_tsc(void) |
dd2cb348 | 386 | { |
ddc61bbc | 387 | u64 current_tick = hv_read_tsc_page(hv_get_tsc_page()); |
dd2cb348 MK |
388 | |
389 | if (current_tick == U64_MAX) | |
f3c5e63c | 390 | current_tick = hv_get_register(HV_REGISTER_TIME_REF_COUNT); |
dd2cb348 MK |
391 | |
392 | return current_tick; | |
393 | } | |
394 | ||
0af3e137 AP |
395 | static u64 notrace read_hv_clock_tsc_cs(struct clocksource *arg) |
396 | { | |
397 | return read_hv_clock_tsc(); | |
398 | } | |
399 | ||
1f3aed01 | 400 | static u64 notrace read_hv_sched_clock_tsc(void) |
dd2cb348 | 401 | { |
749da8ca YX |
402 | return (read_hv_clock_tsc() - hv_sched_clock_offset) * |
403 | (NSEC_PER_SEC / HV_CLOCK_HZ); | |
dd2cb348 MK |
404 | } |
405 | ||
1349401f DC |
406 | static void suspend_hv_clock_tsc(struct clocksource *arg) |
407 | { | |
4ad1aa57 | 408 | union hv_reference_tsc_msr tsc_msr; |
1349401f DC |
409 | |
410 | /* Disable the TSC page */ | |
4ad1aa57 AR |
411 | tsc_msr.as_uint64 = hv_get_register(HV_REGISTER_REFERENCE_TSC); |
412 | tsc_msr.enable = 0; | |
413 | hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr.as_uint64); | |
1349401f DC |
414 | } |
415 | ||
416 | ||
417 | static void resume_hv_clock_tsc(struct clocksource *arg) | |
418 | { | |
4ad1aa57 | 419 | union hv_reference_tsc_msr tsc_msr; |
1349401f DC |
420 | |
421 | /* Re-enable the TSC page */ | |
4ad1aa57 AR |
422 | tsc_msr.as_uint64 = hv_get_register(HV_REGISTER_REFERENCE_TSC); |
423 | tsc_msr.enable = 1; | |
e1f5c66d | 424 | tsc_msr.pfn = tsc_pfn; |
4ad1aa57 | 425 | hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr.as_uint64); |
1349401f DC |
426 | } |
427 | ||
3486d2c9 | 428 | #ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK |
eec399dd TG |
429 | static int hv_cs_enable(struct clocksource *cs) |
430 | { | |
e4ab4658 | 431 | vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK); |
eec399dd TG |
432 | return 0; |
433 | } | |
e4ab4658 | 434 | #endif |
eec399dd | 435 | |
dd2cb348 MK |
436 | static struct clocksource hyperv_cs_tsc = { |
437 | .name = "hyperv_clocksource_tsc_page", | |
4c78738e | 438 | .rating = 500, |
0af3e137 | 439 | .read = read_hv_clock_tsc_cs, |
dd2cb348 MK |
440 | .mask = CLOCKSOURCE_MASK(64), |
441 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | |
1349401f DC |
442 | .suspend= suspend_hv_clock_tsc, |
443 | .resume = resume_hv_clock_tsc, | |
3486d2c9 | 444 | #ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK |
eec399dd | 445 | .enable = hv_cs_enable, |
e4ab4658 MK |
446 | .vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK, |
447 | #else | |
448 | .vdso_clock_mode = VDSO_CLOCKMODE_NONE, | |
449 | #endif | |
dd2cb348 | 450 | }; |
dd2cb348 | 451 | |
0af3e137 | 452 | static u64 notrace read_hv_clock_msr(void) |
dd2cb348 | 453 | { |
dd2cb348 MK |
454 | /* |
455 | * Read the partition counter to get the current tick count. This count | |
456 | * is set to 0 when the partition is created and is incremented in | |
457 | * 100 nanosecond units. | |
458 | */ | |
f3c5e63c | 459 | return hv_get_register(HV_REGISTER_TIME_REF_COUNT); |
dd2cb348 MK |
460 | } |
461 | ||
0af3e137 AP |
462 | static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg) |
463 | { | |
464 | return read_hv_clock_msr(); | |
465 | } | |
466 | ||
1f3aed01 | 467 | static u64 notrace read_hv_sched_clock_msr(void) |
dd2cb348 | 468 | { |
749da8ca YX |
469 | return (read_hv_clock_msr() - hv_sched_clock_offset) * |
470 | (NSEC_PER_SEC / HV_CLOCK_HZ); | |
dd2cb348 MK |
471 | } |
472 | ||
473 | static struct clocksource hyperv_cs_msr = { | |
474 | .name = "hyperv_clocksource_msr", | |
4c78738e | 475 | .rating = 500, |
0af3e137 | 476 | .read = read_hv_clock_msr_cs, |
dd2cb348 MK |
477 | .mask = CLOCKSOURCE_MASK(64), |
478 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | |
479 | }; | |
480 | ||
eb3e1d37 MK |
481 | /* |
482 | * Reference to pv_ops must be inline so objtool | |
483 | * detection of noinstr violations can work correctly. | |
484 | */ | |
485 | #ifdef CONFIG_GENERIC_SCHED_CLOCK | |
486 | static __always_inline void hv_setup_sched_clock(void *sched_clock) | |
487 | { | |
488 | /* | |
489 | * We're on an architecture with generic sched clock (not x86/x64). | |
490 | * The Hyper-V sched clock read function returns nanoseconds, not | |
491 | * the normal 100ns units of the Hyper-V synthetic clock. | |
492 | */ | |
493 | sched_clock_register(sched_clock, 64, NSEC_PER_SEC); | |
494 | } | |
495 | #elif defined CONFIG_PARAVIRT | |
496 | static __always_inline void hv_setup_sched_clock(void *sched_clock) | |
497 | { | |
498 | /* We're on x86/x64 *and* using PV ops */ | |
4d480dbf | 499 | paravirt_set_sched_clock(sched_clock); |
eb3e1d37 MK |
500 | } |
501 | #else /* !CONFIG_GENERIC_SCHED_CLOCK && !CONFIG_PARAVIRT */ | |
502 | static __always_inline void hv_setup_sched_clock(void *sched_clock) {} | |
503 | #endif /* CONFIG_GENERIC_SCHED_CLOCK */ | |
504 | ||
dd2cb348 MK |
505 | static bool __init hv_init_tsc_clocksource(void) |
506 | { | |
4ad1aa57 | 507 | union hv_reference_tsc_msr tsc_msr; |
dd2cb348 MK |
508 | |
509 | if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) | |
510 | return false; | |
511 | ||
4c78738e MK |
512 | /* |
513 | * If Hyper-V offers TSC_INVARIANT, then the virtualized TSC correctly | |
514 | * handles frequency and offset changes due to live migration, | |
515 | * pause/resume, and other VM management operations. So lower the | |
516 | * Hyper-V Reference TSC rating, causing the generic TSC to be used. | |
517 | * TSC_INVARIANT is not offered on ARM64, so the Hyper-V Reference | |
518 | * TSC will be preferred over the virtualized ARM64 arch counter. | |
ec866be6 MK |
519 | * While the Hyper-V MSR clocksource won't be used since the |
520 | * Reference TSC clocksource is present, change its rating as | |
521 | * well for consistency. | |
4c78738e | 522 | */ |
ec866be6 | 523 | if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { |
4c78738e | 524 | hyperv_cs_tsc.rating = 250; |
ec866be6 MK |
525 | hyperv_cs_msr.rating = 250; |
526 | } | |
4c78738e | 527 | |
0af3e137 | 528 | hv_read_reference_counter = read_hv_clock_tsc; |
dd2cb348 MK |
529 | |
530 | /* | |
0408f16b SK |
531 | * TSC page mapping works differently in root compared to guest. |
532 | * - In guest partition the guest PFN has to be passed to the | |
533 | * hypervisor. | |
534 | * - In root partition it's other way around: it has to map the PFN | |
535 | * provided by the hypervisor. | |
536 | * But it can't be mapped right here as it's too early and MMU isn't | |
537 | * ready yet. So, we only set the enable bit here and will remap the | |
538 | * page later in hv_remap_tsc_clocksource(). | |
539 | * | |
540 | * It worth mentioning, that TSC clocksource read function | |
541 | * (read_hv_clock_tsc) has a MSR-based fallback mechanism, used when | |
542 | * TSC page is zeroed (which is the case until the PFN is remapped) and | |
543 | * thus TSC clocksource will work even without the real TSC page | |
544 | * mapped. | |
dd2cb348 | 545 | */ |
4ad1aa57 | 546 | tsc_msr.as_uint64 = hv_get_register(HV_REGISTER_REFERENCE_TSC); |
0408f16b SK |
547 | if (hv_root_partition) |
548 | tsc_pfn = tsc_msr.pfn; | |
549 | else | |
550 | tsc_pfn = HVPFN_DOWN(virt_to_phys(tsc_page)); | |
4ad1aa57 | 551 | tsc_msr.enable = 1; |
e1f5c66d | 552 | tsc_msr.pfn = tsc_pfn; |
4ad1aa57 | 553 | hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr.as_uint64); |
dd2cb348 | 554 | |
dd2cb348 MK |
555 | clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); |
556 | ||
0af3e137 | 557 | hv_sched_clock_offset = hv_read_reference_counter(); |
bd00cd52 TL |
558 | hv_setup_sched_clock(read_hv_sched_clock_tsc); |
559 | ||
dd2cb348 MK |
560 | return true; |
561 | } | |
dd2cb348 MK |
562 | |
563 | void __init hv_init_clocksource(void) | |
564 | { | |
565 | /* | |
566 | * Try to set up the TSC page clocksource. If it succeeds, we're | |
4bf07f65 | 567 | * done. Otherwise, set up the MSR clocksource. At least one of |
dd2cb348 MK |
568 | * these will always be available except on very old versions of |
569 | * Hyper-V on x86. In that case we won't have a Hyper-V | |
570 | * clocksource, but Linux will still run with a clocksource based | |
571 | * on the emulated PIT or LAPIC timer. | |
572 | */ | |
573 | if (hv_init_tsc_clocksource()) | |
574 | return; | |
575 | ||
576 | if (!(ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE)) | |
577 | return; | |
578 | ||
0af3e137 | 579 | hv_read_reference_counter = read_hv_clock_msr; |
dd2cb348 MK |
580 | clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); |
581 | ||
0af3e137 | 582 | hv_sched_clock_offset = hv_read_reference_counter(); |
bd00cd52 | 583 | hv_setup_sched_clock(read_hv_sched_clock_msr); |
dd2cb348 | 584 | } |
0408f16b SK |
585 | |
586 | void __init hv_remap_tsc_clocksource(void) | |
587 | { | |
588 | if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) | |
589 | return; | |
590 | ||
591 | if (!hv_root_partition) { | |
592 | WARN(1, "%s: attempt to remap TSC page in guest partition\n", | |
593 | __func__); | |
594 | return; | |
595 | } | |
596 | ||
597 | tsc_page = memremap(tsc_pfn << HV_HYP_PAGE_SHIFT, sizeof(tsc_pg), | |
598 | MEMREMAP_WB); | |
599 | if (!tsc_page) | |
600 | pr_err("Failed to remap Hyper-V TSC page.\n"); | |
601 | } |