Commit | Line | Data |
---|---|---|
fd1fea68 MK |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | /* | |
4 | * Clocksource driver for the synthetic counter and timers | |
5 | * provided by the Hyper-V hypervisor to guest VMs, as described | |
6 | * in the Hyper-V Top Level Functional Spec (TLFS). This driver | |
7 | * is instruction set architecture independent. | |
8 | * | |
9 | * Copyright (C) 2019, Microsoft, Inc. | |
10 | * | |
11 | * Author: Michael Kelley <mikelley@microsoft.com> | |
12 | */ | |
13 | ||
14 | #include <linux/percpu.h> | |
15 | #include <linux/cpumask.h> | |
16 | #include <linux/clockchips.h> | |
dd2cb348 MK |
17 | #include <linux/clocksource.h> |
18 | #include <linux/sched_clock.h> | |
fd1fea68 | 19 | #include <linux/mm.h> |
4df4cb9e | 20 | #include <linux/cpuhotplug.h> |
ec866be6 MK |
21 | #include <linux/interrupt.h> |
22 | #include <linux/irq.h> | |
23 | #include <linux/acpi.h> | |
fd1fea68 MK |
24 | #include <clocksource/hyperv_timer.h> |
25 | #include <asm/hyperv-tlfs.h> | |
26 | #include <asm/mshyperv.h> | |
27 | ||
28 | static struct clock_event_device __percpu *hv_clock_event; | |
bd00cd52 | 29 | static u64 hv_sched_clock_offset __ro_after_init; |
fd1fea68 MK |
30 | |
31 | /* | |
32 | * If false, we're using the old mechanism for stimer0 interrupts | |
33 | * where it sends a VMbus message when it expires. The old | |
34 | * mechanism is used when running on older versions of Hyper-V | |
35 | * that don't support Direct Mode. While Hyper-V provides | |
36 | * four stimer's per CPU, Linux uses only stimer0. | |
4df4cb9e MK |
37 | * |
38 | * Because Direct Mode does not require processing a VMbus | |
39 | * message, stimer interrupts can be enabled earlier in the | |
40 | * process of booting a CPU, and consistent with when timer | |
41 | * interrupts are enabled for other clocksource drivers. | |
42 | * However, for legacy versions of Hyper-V when Direct Mode | |
43 | * is not enabled, setting up stimer interrupts must be | |
44 | * delayed until VMbus is initialized and can process the | |
45 | * interrupt message. | |
fd1fea68 MK |
46 | */ |
47 | static bool direct_mode_enabled; | |
48 | ||
ec866be6 | 49 | static int stimer0_irq = -1; |
fd1fea68 | 50 | static int stimer0_message_sint; |
ec866be6 | 51 | static DEFINE_PER_CPU(long, stimer0_evt); |
fd1fea68 MK |
52 | |
53 | /* | |
ec866be6 MK |
54 | * Common code for stimer0 interrupts coming via Direct Mode or |
55 | * as a VMbus message. | |
fd1fea68 MK |
56 | */ |
57 | void hv_stimer0_isr(void) | |
58 | { | |
59 | struct clock_event_device *ce; | |
60 | ||
61 | ce = this_cpu_ptr(hv_clock_event); | |
62 | ce->event_handler(ce); | |
63 | } | |
64 | EXPORT_SYMBOL_GPL(hv_stimer0_isr); | |
65 | ||
ec866be6 MK |
66 | /* |
67 | * stimer0 interrupt handler for architectures that support | |
68 | * per-cpu interrupts, which also implies Direct Mode. | |
69 | */ | |
70 | static irqreturn_t hv_stimer0_percpu_isr(int irq, void *dev_id) | |
71 | { | |
72 | hv_stimer0_isr(); | |
73 | return IRQ_HANDLED; | |
74 | } | |
75 | ||
fd1fea68 MK |
76 | static int hv_ce_set_next_event(unsigned long delta, |
77 | struct clock_event_device *evt) | |
78 | { | |
79 | u64 current_tick; | |
80 | ||
0af3e137 | 81 | current_tick = hv_read_reference_counter(); |
fd1fea68 | 82 | current_tick += delta; |
f3c5e63c | 83 | hv_set_register(HV_REGISTER_STIMER0_COUNT, current_tick); |
fd1fea68 MK |
84 | return 0; |
85 | } | |
86 | ||
87 | static int hv_ce_shutdown(struct clock_event_device *evt) | |
88 | { | |
f3c5e63c MK |
89 | hv_set_register(HV_REGISTER_STIMER0_COUNT, 0); |
90 | hv_set_register(HV_REGISTER_STIMER0_CONFIG, 0); | |
ec866be6 MK |
91 | if (direct_mode_enabled && stimer0_irq >= 0) |
92 | disable_percpu_irq(stimer0_irq); | |
fd1fea68 MK |
93 | |
94 | return 0; | |
95 | } | |
96 | ||
97 | static int hv_ce_set_oneshot(struct clock_event_device *evt) | |
98 | { | |
99 | union hv_stimer_config timer_cfg; | |
100 | ||
101 | timer_cfg.as_uint64 = 0; | |
102 | timer_cfg.enable = 1; | |
103 | timer_cfg.auto_enable = 1; | |
104 | if (direct_mode_enabled) { | |
105 | /* | |
106 | * When it expires, the timer will directly interrupt | |
107 | * on the specified hardware vector/IRQ. | |
108 | */ | |
109 | timer_cfg.direct_mode = 1; | |
ec866be6 MK |
110 | timer_cfg.apic_vector = HYPERV_STIMER0_VECTOR; |
111 | if (stimer0_irq >= 0) | |
112 | enable_percpu_irq(stimer0_irq, IRQ_TYPE_NONE); | |
fd1fea68 MK |
113 | } else { |
114 | /* | |
115 | * When it expires, the timer will generate a VMbus message, | |
116 | * to be handled by the normal VMbus interrupt handler. | |
117 | */ | |
118 | timer_cfg.direct_mode = 0; | |
119 | timer_cfg.sintx = stimer0_message_sint; | |
120 | } | |
f3c5e63c | 121 | hv_set_register(HV_REGISTER_STIMER0_CONFIG, timer_cfg.as_uint64); |
fd1fea68 MK |
122 | return 0; |
123 | } | |
124 | ||
125 | /* | |
126 | * hv_stimer_init - Per-cpu initialization of the clockevent | |
127 | */ | |
4df4cb9e | 128 | static int hv_stimer_init(unsigned int cpu) |
fd1fea68 MK |
129 | { |
130 | struct clock_event_device *ce; | |
131 | ||
4df4cb9e MK |
132 | if (!hv_clock_event) |
133 | return 0; | |
fd1fea68 MK |
134 | |
135 | ce = per_cpu_ptr(hv_clock_event, cpu); | |
136 | ce->name = "Hyper-V clockevent"; | |
137 | ce->features = CLOCK_EVT_FEAT_ONESHOT; | |
138 | ce->cpumask = cpumask_of(cpu); | |
139 | ce->rating = 1000; | |
140 | ce->set_state_shutdown = hv_ce_shutdown; | |
141 | ce->set_state_oneshot = hv_ce_set_oneshot; | |
142 | ce->set_next_event = hv_ce_set_next_event; | |
143 | ||
144 | clockevents_config_and_register(ce, | |
145 | HV_CLOCK_HZ, | |
146 | HV_MIN_DELTA_TICKS, | |
147 | HV_MAX_MAX_DELTA_TICKS); | |
4df4cb9e | 148 | return 0; |
fd1fea68 | 149 | } |
fd1fea68 MK |
150 | |
151 | /* | |
152 | * hv_stimer_cleanup - Per-cpu cleanup of the clockevent | |
153 | */ | |
4df4cb9e | 154 | int hv_stimer_cleanup(unsigned int cpu) |
fd1fea68 MK |
155 | { |
156 | struct clock_event_device *ce; | |
157 | ||
4df4cb9e MK |
158 | if (!hv_clock_event) |
159 | return 0; | |
160 | ||
161 | /* | |
162 | * In the legacy case where Direct Mode is not enabled | |
163 | * (which can only be on x86/64), stimer cleanup happens | |
164 | * relatively early in the CPU offlining process. We | |
165 | * must unbind the stimer-based clockevent device so | |
166 | * that the LAPIC timer can take over until clockevents | |
167 | * are no longer needed in the offlining process. Note | |
168 | * that clockevents_unbind_device() eventually calls | |
169 | * hv_ce_shutdown(). | |
170 | * | |
171 | * The unbind should not be done when Direct Mode is | |
172 | * enabled because we may be on an architecture where | |
173 | * there are no other clockevent devices to fallback to. | |
174 | */ | |
175 | ce = per_cpu_ptr(hv_clock_event, cpu); | |
176 | if (direct_mode_enabled) | |
fd1fea68 | 177 | hv_ce_shutdown(ce); |
4df4cb9e MK |
178 | else |
179 | clockevents_unbind_device(ce, cpu); | |
180 | ||
181 | return 0; | |
fd1fea68 MK |
182 | } |
183 | EXPORT_SYMBOL_GPL(hv_stimer_cleanup); | |
184 | ||
ec866be6 MK |
185 | /* |
186 | * These placeholders are overridden by arch specific code on | |
187 | * architectures that need special setup of the stimer0 IRQ because | |
188 | * they don't support per-cpu IRQs (such as x86/x64). | |
189 | */ | |
190 | void __weak hv_setup_stimer0_handler(void (*handler)(void)) | |
191 | { | |
192 | }; | |
193 | ||
194 | void __weak hv_remove_stimer0_handler(void) | |
195 | { | |
196 | }; | |
197 | ||
198 | /* Called only on architectures with per-cpu IRQs (i.e., not x86/x64) */ | |
199 | static int hv_setup_stimer0_irq(void) | |
200 | { | |
201 | int ret; | |
202 | ||
203 | ret = acpi_register_gsi(NULL, HYPERV_STIMER0_VECTOR, | |
204 | ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_HIGH); | |
205 | if (ret < 0) { | |
206 | pr_err("Can't register Hyper-V stimer0 GSI. Error %d", ret); | |
207 | return ret; | |
208 | } | |
209 | stimer0_irq = ret; | |
210 | ||
211 | ret = request_percpu_irq(stimer0_irq, hv_stimer0_percpu_isr, | |
212 | "Hyper-V stimer0", &stimer0_evt); | |
213 | if (ret) { | |
214 | pr_err("Can't request Hyper-V stimer0 IRQ %d. Error %d", | |
215 | stimer0_irq, ret); | |
216 | acpi_unregister_gsi(stimer0_irq); | |
217 | stimer0_irq = -1; | |
218 | } | |
219 | return ret; | |
220 | } | |
221 | ||
222 | static void hv_remove_stimer0_irq(void) | |
223 | { | |
224 | if (stimer0_irq == -1) { | |
225 | hv_remove_stimer0_handler(); | |
226 | } else { | |
227 | free_percpu_irq(stimer0_irq, &stimer0_evt); | |
228 | acpi_unregister_gsi(stimer0_irq); | |
229 | stimer0_irq = -1; | |
230 | } | |
231 | } | |
232 | ||
fd1fea68 | 233 | /* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */ |
ec866be6 | 234 | int hv_stimer_alloc(bool have_percpu_irqs) |
fd1fea68 | 235 | { |
ec866be6 | 236 | int ret; |
4df4cb9e MK |
237 | |
238 | /* | |
239 | * Synthetic timers are always available except on old versions of | |
240 | * Hyper-V on x86. In that case, return as error as Linux will use a | |
241 | * clockevent based on emulated LAPIC timer hardware. | |
242 | */ | |
243 | if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE)) | |
244 | return -EINVAL; | |
fd1fea68 MK |
245 | |
246 | hv_clock_event = alloc_percpu(struct clock_event_device); | |
247 | if (!hv_clock_event) | |
248 | return -ENOMEM; | |
249 | ||
250 | direct_mode_enabled = ms_hyperv.misc_features & | |
251 | HV_STIMER_DIRECT_MODE_AVAILABLE; | |
ec866be6 MK |
252 | |
253 | /* | |
254 | * If Direct Mode isn't enabled, the remainder of the initialization | |
255 | * is done later by hv_stimer_legacy_init() | |
256 | */ | |
257 | if (!direct_mode_enabled) | |
258 | return 0; | |
259 | ||
260 | if (have_percpu_irqs) { | |
261 | ret = hv_setup_stimer0_irq(); | |
4df4cb9e | 262 | if (ret) |
ec866be6 MK |
263 | goto free_clock_event; |
264 | } else { | |
265 | hv_setup_stimer0_handler(hv_stimer0_isr); | |
266 | } | |
4df4cb9e | 267 | |
ec866be6 MK |
268 | /* |
269 | * Since we are in Direct Mode, stimer initialization | |
270 | * can be done now with a CPUHP value in the same range | |
271 | * as other clockevent devices. | |
272 | */ | |
273 | ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING, | |
274 | "clockevents/hyperv/stimer:starting", | |
275 | hv_stimer_init, hv_stimer_cleanup); | |
276 | if (ret < 0) { | |
277 | hv_remove_stimer0_irq(); | |
278 | goto free_clock_event; | |
fd1fea68 | 279 | } |
4df4cb9e | 280 | return ret; |
fd1fea68 | 281 | |
ec866be6 | 282 | free_clock_event: |
4df4cb9e MK |
283 | free_percpu(hv_clock_event); |
284 | hv_clock_event = NULL; | |
285 | return ret; | |
fd1fea68 MK |
286 | } |
287 | EXPORT_SYMBOL_GPL(hv_stimer_alloc); | |
288 | ||
4df4cb9e MK |
289 | /* |
290 | * hv_stimer_legacy_init -- Called from the VMbus driver to handle | |
291 | * the case when Direct Mode is not enabled, and the stimer | |
292 | * must be initialized late in the CPU onlining process. | |
293 | * | |
294 | */ | |
295 | void hv_stimer_legacy_init(unsigned int cpu, int sint) | |
296 | { | |
297 | if (direct_mode_enabled) | |
298 | return; | |
299 | ||
300 | /* | |
301 | * This function gets called by each vCPU, so setting the | |
302 | * global stimer_message_sint value each time is conceptually | |
303 | * not ideal, but the value passed in is always the same and | |
304 | * it avoids introducing yet another interface into this | |
305 | * clocksource driver just to set the sint in the legacy case. | |
306 | */ | |
307 | stimer0_message_sint = sint; | |
308 | (void)hv_stimer_init(cpu); | |
309 | } | |
310 | EXPORT_SYMBOL_GPL(hv_stimer_legacy_init); | |
311 | ||
312 | /* | |
313 | * hv_stimer_legacy_cleanup -- Called from the VMbus driver to | |
314 | * handle the case when Direct Mode is not enabled, and the | |
315 | * stimer must be cleaned up early in the CPU offlining | |
316 | * process. | |
317 | */ | |
318 | void hv_stimer_legacy_cleanup(unsigned int cpu) | |
319 | { | |
320 | if (direct_mode_enabled) | |
321 | return; | |
322 | (void)hv_stimer_cleanup(cpu); | |
323 | } | |
324 | EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup); | |
325 | ||
fd1fea68 MK |
326 | /* |
327 | * Do a global cleanup of clockevents for the cases of kexec and | |
328 | * vmbus exit | |
329 | */ | |
330 | void hv_stimer_global_cleanup(void) | |
331 | { | |
332 | int cpu; | |
fd1fea68 | 333 | |
4df4cb9e MK |
334 | /* |
335 | * hv_stime_legacy_cleanup() will stop the stimer if Direct | |
336 | * Mode is not enabled, and fallback to the LAPIC timer. | |
337 | */ | |
338 | for_each_present_cpu(cpu) { | |
339 | hv_stimer_legacy_cleanup(cpu); | |
fd1fea68 | 340 | } |
4df4cb9e | 341 | |
ec866be6 MK |
342 | if (!hv_clock_event) |
343 | return; | |
344 | ||
345 | if (direct_mode_enabled) { | |
346 | cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING); | |
347 | hv_remove_stimer0_irq(); | |
348 | stimer0_irq = -1; | |
349 | } | |
350 | free_percpu(hv_clock_event); | |
351 | hv_clock_event = NULL; | |
352 | ||
fd1fea68 MK |
353 | } |
354 | EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); | |
dd2cb348 MK |
355 | |
356 | /* | |
357 | * Code and definitions for the Hyper-V clocksources. Two | |
358 | * clocksources are defined: one that reads the Hyper-V defined MSR, and | |
359 | * the other that uses the TSC reference page feature as defined in the | |
360 | * TLFS. The MSR version is for compatibility with old versions of | |
361 | * Hyper-V and 32-bit x86. The TSC reference page version is preferred. | |
362 | */ | |
363 | ||
0af3e137 AP |
364 | u64 (*hv_read_reference_counter)(void); |
365 | EXPORT_SYMBOL_GPL(hv_read_reference_counter); | |
dd2cb348 | 366 | |
ddc61bbc BF |
367 | static union { |
368 | struct ms_hyperv_tsc_page page; | |
369 | u8 reserved[PAGE_SIZE]; | |
370 | } tsc_pg __aligned(PAGE_SIZE); | |
dd2cb348 MK |
371 | |
372 | struct ms_hyperv_tsc_page *hv_get_tsc_page(void) | |
373 | { | |
ddc61bbc | 374 | return &tsc_pg.page; |
dd2cb348 MK |
375 | } |
376 | EXPORT_SYMBOL_GPL(hv_get_tsc_page); | |
377 | ||
0af3e137 | 378 | static u64 notrace read_hv_clock_tsc(void) |
dd2cb348 | 379 | { |
ddc61bbc | 380 | u64 current_tick = hv_read_tsc_page(hv_get_tsc_page()); |
dd2cb348 MK |
381 | |
382 | if (current_tick == U64_MAX) | |
f3c5e63c | 383 | current_tick = hv_get_register(HV_REGISTER_TIME_REF_COUNT); |
dd2cb348 MK |
384 | |
385 | return current_tick; | |
386 | } | |
387 | ||
0af3e137 AP |
388 | static u64 notrace read_hv_clock_tsc_cs(struct clocksource *arg) |
389 | { | |
390 | return read_hv_clock_tsc(); | |
391 | } | |
392 | ||
1f3aed01 | 393 | static u64 notrace read_hv_sched_clock_tsc(void) |
dd2cb348 | 394 | { |
749da8ca YX |
395 | return (read_hv_clock_tsc() - hv_sched_clock_offset) * |
396 | (NSEC_PER_SEC / HV_CLOCK_HZ); | |
dd2cb348 MK |
397 | } |
398 | ||
1349401f DC |
399 | static void suspend_hv_clock_tsc(struct clocksource *arg) |
400 | { | |
401 | u64 tsc_msr; | |
402 | ||
403 | /* Disable the TSC page */ | |
f3c5e63c | 404 | tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC); |
1349401f | 405 | tsc_msr &= ~BIT_ULL(0); |
f3c5e63c | 406 | hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); |
1349401f DC |
407 | } |
408 | ||
409 | ||
410 | static void resume_hv_clock_tsc(struct clocksource *arg) | |
411 | { | |
412 | phys_addr_t phys_addr = virt_to_phys(&tsc_pg); | |
413 | u64 tsc_msr; | |
414 | ||
415 | /* Re-enable the TSC page */ | |
f3c5e63c | 416 | tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC); |
1349401f DC |
417 | tsc_msr &= GENMASK_ULL(11, 0); |
418 | tsc_msr |= BIT_ULL(0) | (u64)phys_addr; | |
f3c5e63c | 419 | hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); |
1349401f DC |
420 | } |
421 | ||
e4ab4658 | 422 | #ifdef VDSO_CLOCKMODE_HVCLOCK |
eec399dd TG |
423 | static int hv_cs_enable(struct clocksource *cs) |
424 | { | |
e4ab4658 | 425 | vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK); |
eec399dd TG |
426 | return 0; |
427 | } | |
e4ab4658 | 428 | #endif |
eec399dd | 429 | |
dd2cb348 MK |
430 | static struct clocksource hyperv_cs_tsc = { |
431 | .name = "hyperv_clocksource_tsc_page", | |
4c78738e | 432 | .rating = 500, |
0af3e137 | 433 | .read = read_hv_clock_tsc_cs, |
dd2cb348 MK |
434 | .mask = CLOCKSOURCE_MASK(64), |
435 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | |
1349401f DC |
436 | .suspend= suspend_hv_clock_tsc, |
437 | .resume = resume_hv_clock_tsc, | |
e4ab4658 | 438 | #ifdef VDSO_CLOCKMODE_HVCLOCK |
eec399dd | 439 | .enable = hv_cs_enable, |
e4ab4658 MK |
440 | .vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK, |
441 | #else | |
442 | .vdso_clock_mode = VDSO_CLOCKMODE_NONE, | |
443 | #endif | |
dd2cb348 | 444 | }; |
dd2cb348 | 445 | |
0af3e137 | 446 | static u64 notrace read_hv_clock_msr(void) |
dd2cb348 | 447 | { |
dd2cb348 MK |
448 | /* |
449 | * Read the partition counter to get the current tick count. This count | |
450 | * is set to 0 when the partition is created and is incremented in | |
451 | * 100 nanosecond units. | |
452 | */ | |
f3c5e63c | 453 | return hv_get_register(HV_REGISTER_TIME_REF_COUNT); |
dd2cb348 MK |
454 | } |
455 | ||
0af3e137 AP |
456 | static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg) |
457 | { | |
458 | return read_hv_clock_msr(); | |
459 | } | |
460 | ||
1f3aed01 | 461 | static u64 notrace read_hv_sched_clock_msr(void) |
dd2cb348 | 462 | { |
749da8ca YX |
463 | return (read_hv_clock_msr() - hv_sched_clock_offset) * |
464 | (NSEC_PER_SEC / HV_CLOCK_HZ); | |
dd2cb348 MK |
465 | } |
466 | ||
467 | static struct clocksource hyperv_cs_msr = { | |
468 | .name = "hyperv_clocksource_msr", | |
4c78738e | 469 | .rating = 500, |
0af3e137 | 470 | .read = read_hv_clock_msr_cs, |
dd2cb348 MK |
471 | .mask = CLOCKSOURCE_MASK(64), |
472 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | |
473 | }; | |
474 | ||
eb3e1d37 MK |
475 | /* |
476 | * Reference to pv_ops must be inline so objtool | |
477 | * detection of noinstr violations can work correctly. | |
478 | */ | |
479 | #ifdef CONFIG_GENERIC_SCHED_CLOCK | |
480 | static __always_inline void hv_setup_sched_clock(void *sched_clock) | |
481 | { | |
482 | /* | |
483 | * We're on an architecture with generic sched clock (not x86/x64). | |
484 | * The Hyper-V sched clock read function returns nanoseconds, not | |
485 | * the normal 100ns units of the Hyper-V synthetic clock. | |
486 | */ | |
487 | sched_clock_register(sched_clock, 64, NSEC_PER_SEC); | |
488 | } | |
489 | #elif defined CONFIG_PARAVIRT | |
490 | static __always_inline void hv_setup_sched_clock(void *sched_clock) | |
491 | { | |
492 | /* We're on x86/x64 *and* using PV ops */ | |
4d480dbf | 493 | paravirt_set_sched_clock(sched_clock); |
eb3e1d37 MK |
494 | } |
495 | #else /* !CONFIG_GENERIC_SCHED_CLOCK && !CONFIG_PARAVIRT */ | |
496 | static __always_inline void hv_setup_sched_clock(void *sched_clock) {} | |
497 | #endif /* CONFIG_GENERIC_SCHED_CLOCK */ | |
498 | ||
dd2cb348 MK |
499 | static bool __init hv_init_tsc_clocksource(void) |
500 | { | |
501 | u64 tsc_msr; | |
502 | phys_addr_t phys_addr; | |
503 | ||
504 | if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) | |
505 | return false; | |
506 | ||
7d4163c8 WL |
507 | if (hv_root_partition) |
508 | return false; | |
509 | ||
4c78738e MK |
510 | /* |
511 | * If Hyper-V offers TSC_INVARIANT, then the virtualized TSC correctly | |
512 | * handles frequency and offset changes due to live migration, | |
513 | * pause/resume, and other VM management operations. So lower the | |
514 | * Hyper-V Reference TSC rating, causing the generic TSC to be used. | |
515 | * TSC_INVARIANT is not offered on ARM64, so the Hyper-V Reference | |
516 | * TSC will be preferred over the virtualized ARM64 arch counter. | |
ec866be6 MK |
517 | * While the Hyper-V MSR clocksource won't be used since the |
518 | * Reference TSC clocksource is present, change its rating as | |
519 | * well for consistency. | |
4c78738e | 520 | */ |
ec866be6 | 521 | if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { |
4c78738e | 522 | hyperv_cs_tsc.rating = 250; |
ec866be6 MK |
523 | hyperv_cs_msr.rating = 250; |
524 | } | |
4c78738e | 525 | |
0af3e137 | 526 | hv_read_reference_counter = read_hv_clock_tsc; |
ddc61bbc | 527 | phys_addr = virt_to_phys(hv_get_tsc_page()); |
dd2cb348 MK |
528 | |
529 | /* | |
530 | * The Hyper-V TLFS specifies to preserve the value of reserved | |
531 | * bits in registers. So read the existing value, preserve the | |
532 | * low order 12 bits, and add in the guest physical address | |
533 | * (which already has at least the low 12 bits set to zero since | |
534 | * it is page aligned). Also set the "enable" bit, which is bit 0. | |
535 | */ | |
f3c5e63c | 536 | tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC); |
dd2cb348 MK |
537 | tsc_msr &= GENMASK_ULL(11, 0); |
538 | tsc_msr = tsc_msr | 0x1 | (u64)phys_addr; | |
f3c5e63c | 539 | hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); |
dd2cb348 | 540 | |
dd2cb348 MK |
541 | clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); |
542 | ||
0af3e137 | 543 | hv_sched_clock_offset = hv_read_reference_counter(); |
bd00cd52 TL |
544 | hv_setup_sched_clock(read_hv_sched_clock_tsc); |
545 | ||
dd2cb348 MK |
546 | return true; |
547 | } | |
dd2cb348 MK |
548 | |
549 | void __init hv_init_clocksource(void) | |
550 | { | |
551 | /* | |
552 | * Try to set up the TSC page clocksource. If it succeeds, we're | |
4bf07f65 | 553 | * done. Otherwise, set up the MSR clocksource. At least one of |
dd2cb348 MK |
554 | * these will always be available except on very old versions of |
555 | * Hyper-V on x86. In that case we won't have a Hyper-V | |
556 | * clocksource, but Linux will still run with a clocksource based | |
557 | * on the emulated PIT or LAPIC timer. | |
558 | */ | |
559 | if (hv_init_tsc_clocksource()) | |
560 | return; | |
561 | ||
562 | if (!(ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE)) | |
563 | return; | |
564 | ||
0af3e137 | 565 | hv_read_reference_counter = read_hv_clock_msr; |
dd2cb348 MK |
566 | clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); |
567 | ||
0af3e137 | 568 | hv_sched_clock_offset = hv_read_reference_counter(); |
bd00cd52 | 569 | hv_setup_sched_clock(read_hv_sched_clock_msr); |
dd2cb348 MK |
570 | } |
571 | EXPORT_SYMBOL_GPL(hv_init_clocksource); |