Commit | Line | Data |
---|---|---|
3b20eb23 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
3e7ee490 | 2 | /* |
3e7ee490 HJ |
3 | * Copyright (c) 2009, Microsoft Corporation. |
4 | * | |
3e7ee490 HJ |
5 | * Authors: |
6 | * Haiyang Zhang <haiyangz@microsoft.com> | |
7 | * Hank Janssen <hjanssen@microsoft.com> | |
3e7ee490 | 8 | */ |
0a46618d HJ |
9 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
10 | ||
faff4406 | 11 | #include <linux/io.h> |
a0086dc5 GKH |
12 | #include <linux/kernel.h> |
13 | #include <linux/mm.h> | |
5a0e3ad6 | 14 | #include <linux/slab.h> |
b7c947f0 | 15 | #include <linux/vmalloc.h> |
46a97191 | 16 | #include <linux/hyperv.h> |
248e742a | 17 | #include <linux/random.h> |
4061ed9e | 18 | #include <linux/clockchips.h> |
b635ccc1 | 19 | #include <linux/delay.h> |
d608715d | 20 | #include <linux/interrupt.h> |
fd1fea68 | 21 | #include <clocksource/hyperv_timer.h> |
4061ed9e | 22 | #include <asm/mshyperv.h> |
0f2a6619 | 23 | #include "hyperv_vmbus.h" |
3e7ee490 | 24 | |
454f18a9 | 25 | /* The one and only */ |
a3cadf38 | 26 | struct hv_context hv_context; |
3e7ee490 | 27 | |
3e189519 | 28 | /* |
d44890c8 | 29 | * hv_init - Main initialization routine. |
0831ad04 GKH |
30 | * |
31 | * This routine must be called before any other routines in here are called | |
32 | */ | |
d44890c8 | 33 | int hv_init(void) |
3e7ee490 | 34 | { |
37cdd991 SH |
35 | hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context); |
36 | if (!hv_context.cpu_context) | |
37 | return -ENOMEM; | |
5433e003 | 38 | return 0; |
3e7ee490 HJ |
39 | } |
40 | ||
ca48739e MK |
41 | /* |
42 | * Functions for allocating and freeing memory with size and | |
43 | * alignment HV_HYP_PAGE_SIZE. These functions are needed because | |
44 | * the guest page size may not be the same as the Hyper-V page | |
45 | * size. We depend upon kmalloc() aligning power-of-two size | |
46 | * allocations to the allocation size boundary, so that the | |
47 | * allocated memory appears to Hyper-V as a page of the size | |
48 | * it expects. | |
49 | */ | |
50 | ||
51 | void *hv_alloc_hyperv_page(void) | |
52 | { | |
53 | BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); | |
54 | ||
55 | if (PAGE_SIZE == HV_HYP_PAGE_SIZE) | |
56 | return (void *)__get_free_page(GFP_KERNEL); | |
57 | else | |
58 | return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); | |
59 | } | |
60 | ||
61 | void *hv_alloc_hyperv_zeroed_page(void) | |
62 | { | |
63 | if (PAGE_SIZE == HV_HYP_PAGE_SIZE) | |
64 | return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | |
65 | else | |
66 | return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); | |
67 | } | |
68 | ||
69 | void hv_free_hyperv_page(unsigned long addr) | |
70 | { | |
71 | if (PAGE_SIZE == HV_HYP_PAGE_SIZE) | |
72 | free_page(addr); | |
73 | else | |
74 | kfree((void *)addr); | |
75 | } | |
76 | ||
3e189519 | 77 | /* |
d44890c8 | 78 | * hv_post_message - Post a message using the hypervisor message IPC. |
0831ad04 GKH |
79 | * |
80 | * This involves a hypercall. | |
81 | */ | |
415f0a02 | 82 | int hv_post_message(union hv_connection_id connection_id, |
b8dfb264 HZ |
83 | enum hv_message_type message_type, |
84 | void *payload, size_t payload_size) | |
3e7ee490 | 85 | { |
b8dfb264 | 86 | struct hv_input_post_message *aligned_msg; |
37cdd991 | 87 | struct hv_per_cpu_context *hv_cpu; |
a108393d | 88 | u64 status; |
3e7ee490 | 89 | |
b8dfb264 | 90 | if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) |
39594abc | 91 | return -EMSGSIZE; |
3e7ee490 | 92 | |
37cdd991 SH |
93 | hv_cpu = get_cpu_ptr(hv_context.cpu_context); |
94 | aligned_msg = hv_cpu->post_msg_page; | |
b8dfb264 | 95 | aligned_msg->connectionid = connection_id; |
b29ef354 | 96 | aligned_msg->reserved = 0; |
b8dfb264 HZ |
97 | aligned_msg->message_type = message_type; |
98 | aligned_msg->payload_size = payload_size; | |
99 | memcpy((void *)aligned_msg->payload, payload, payload_size); | |
3e7ee490 | 100 | |
20c89a55 TL |
101 | if (hv_isolation_type_snp()) |
102 | status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE, | |
103 | (void *)aligned_msg, NULL, | |
104 | sizeof(*aligned_msg)); | |
105 | else | |
106 | status = hv_do_hypercall(HVCALL_POST_MESSAGE, | |
107 | aligned_msg, NULL); | |
3e7ee490 | 108 | |
13b9abfc MK |
109 | /* Preemption must remain disabled until after the hypercall |
110 | * so some other thread can't get scheduled onto this cpu and | |
111 | * corrupt the per-cpu post_msg_page | |
112 | */ | |
113 | put_cpu_ptr(hv_cpu); | |
114 | ||
753ed9c9 | 115 | return hv_result(status); |
3e7ee490 HJ |
116 | } |
117 | ||
2608fb65 JW |
118 | int hv_synic_alloc(void) |
119 | { | |
2608fb65 | 120 | int cpu; |
f25a7ece MK |
121 | struct hv_per_cpu_context *hv_cpu; |
122 | ||
123 | /* | |
124 | * First, zero all per-cpu memory areas so hv_synic_free() can | |
125 | * detect what memory has been allocated and cleanup properly | |
126 | * after any failures. | |
127 | */ | |
128 | for_each_present_cpu(cpu) { | |
129 | hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); | |
130 | memset(hv_cpu, 0, sizeof(*hv_cpu)); | |
131 | } | |
2608fb65 | 132 | |
6396bb22 | 133 | hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask), |
597ff72f | 134 | GFP_KERNEL); |
9f01ec53 S |
135 | if (hv_context.hv_numa_map == NULL) { |
136 | pr_err("Unable to allocate NUMA map\n"); | |
137 | goto err; | |
138 | } | |
139 | ||
421b8f20 | 140 | for_each_present_cpu(cpu) { |
f25a7ece | 141 | hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); |
37cdd991 | 142 | |
37cdd991 SH |
143 | tasklet_init(&hv_cpu->msg_dpc, |
144 | vmbus_on_msg_dpc, (unsigned long) hv_cpu); | |
145 | ||
faff4406 TL |
146 | /* |
147 | * Synic message and event pages are allocated by paravisor. | |
148 | * Skip these pages allocation here. | |
149 | */ | |
150 | if (!hv_isolation_type_snp()) { | |
151 | hv_cpu->synic_message_page = | |
152 | (void *)get_zeroed_page(GFP_ATOMIC); | |
153 | if (hv_cpu->synic_message_page == NULL) { | |
154 | pr_err("Unable to allocate SYNIC message page\n"); | |
155 | goto err; | |
156 | } | |
157 | ||
158 | hv_cpu->synic_event_page = | |
159 | (void *)get_zeroed_page(GFP_ATOMIC); | |
160 | if (hv_cpu->synic_event_page == NULL) { | |
161 | pr_err("Unable to allocate SYNIC event page\n"); | |
162 | goto err; | |
163 | } | |
2608fb65 | 164 | } |
b29ef354 | 165 | |
37cdd991 SH |
166 | hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC); |
167 | if (hv_cpu->post_msg_page == NULL) { | |
b29ef354 S |
168 | pr_err("Unable to allocate post msg page\n"); |
169 | goto err; | |
170 | } | |
2608fb65 JW |
171 | } |
172 | ||
173 | return 0; | |
174 | err: | |
57208632 MK |
175 | /* |
176 | * Any memory allocations that succeeded will be freed when | |
177 | * the caller cleans up by calling hv_synic_free() | |
178 | */ | |
2608fb65 JW |
179 | return -ENOMEM; |
180 | } | |
181 | ||
2608fb65 JW |
182 | |
183 | void hv_synic_free(void) | |
184 | { | |
185 | int cpu; | |
186 | ||
37cdd991 SH |
187 | for_each_present_cpu(cpu) { |
188 | struct hv_per_cpu_context *hv_cpu | |
189 | = per_cpu_ptr(hv_context.cpu_context, cpu); | |
190 | ||
57208632 MK |
191 | free_page((unsigned long)hv_cpu->synic_event_page); |
192 | free_page((unsigned long)hv_cpu->synic_message_page); | |
193 | free_page((unsigned long)hv_cpu->post_msg_page); | |
37cdd991 SH |
194 | } |
195 | ||
9f01ec53 | 196 | kfree(hv_context.hv_numa_map); |
2608fb65 JW |
197 | } |
198 | ||
3e189519 | 199 | /* |
68cb8117 | 200 | * hv_synic_init - Initialize the Synthetic Interrupt Controller. |
0831ad04 GKH |
201 | * |
202 | * If it is already initialized by another entity (ie x2v shim), we need to | |
203 | * retrieve the initialized message and event pages. Otherwise, we create and | |
204 | * initialize the message and event pages. | |
205 | */ | |
dba61cda | 206 | void hv_synic_enable_regs(unsigned int cpu) |
3e7ee490 | 207 | { |
37cdd991 SH |
208 | struct hv_per_cpu_context *hv_cpu |
209 | = per_cpu_ptr(hv_context.cpu_context, cpu); | |
eacb1b4d GKH |
210 | union hv_synic_simp simp; |
211 | union hv_synic_siefp siefp; | |
b8dfb264 | 212 | union hv_synic_sint shared_sint; |
eacb1b4d | 213 | union hv_synic_scontrol sctrl; |
a73e6b7c | 214 | |
a73e6b7c | 215 | /* Setup the Synic's message page */ |
f3c5e63c | 216 | simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP); |
f6feebe0 | 217 | simp.simp_enabled = 1; |
faff4406 TL |
218 | |
219 | if (hv_isolation_type_snp()) { | |
220 | hv_cpu->synic_message_page | |
221 | = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT, | |
222 | HV_HYP_PAGE_SIZE, MEMREMAP_WB); | |
223 | if (!hv_cpu->synic_message_page) | |
224 | pr_err("Fail to map syinc message page.\n"); | |
225 | } else { | |
226 | simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page) | |
227 | >> HV_HYP_PAGE_SHIFT; | |
228 | } | |
3e7ee490 | 229 | |
f3c5e63c | 230 | hv_set_register(HV_REGISTER_SIMP, simp.as_uint64); |
3e7ee490 | 231 | |
a73e6b7c | 232 | /* Setup the Synic's event page */ |
f3c5e63c | 233 | siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); |
f6feebe0 | 234 | siefp.siefp_enabled = 1; |
faff4406 TL |
235 | |
236 | if (hv_isolation_type_snp()) { | |
237 | hv_cpu->synic_event_page = | |
238 | memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT, | |
239 | HV_HYP_PAGE_SIZE, MEMREMAP_WB); | |
240 | ||
241 | if (!hv_cpu->synic_event_page) | |
242 | pr_err("Fail to map syinc event page.\n"); | |
243 | } else { | |
244 | siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page) | |
245 | >> HV_HYP_PAGE_SHIFT; | |
246 | } | |
a73e6b7c | 247 | |
f3c5e63c | 248 | hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); |
0831ad04 | 249 | |
0831ad04 | 250 | /* Setup the shared SINT. */ |
d608715d MK |
251 | if (vmbus_irq != -1) |
252 | enable_percpu_irq(vmbus_irq, 0); | |
f3c5e63c MK |
253 | shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 + |
254 | VMBUS_MESSAGE_SINT); | |
3e7ee490 | 255 | |
d608715d | 256 | shared_sint.vector = vmbus_interrupt; |
b8dfb264 | 257 | shared_sint.masked = false; |
946f4b86 MK |
258 | |
259 | /* | |
260 | * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64), | |
261 | * it doesn't provide a recommendation flag and AEOI must be disabled. | |
262 | */ | |
263 | #ifdef HV_DEPRECATING_AEOI_RECOMMENDED | |
264 | shared_sint.auto_eoi = | |
265 | !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED); | |
266 | #else | |
267 | shared_sint.auto_eoi = 0; | |
268 | #endif | |
f3c5e63c MK |
269 | hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, |
270 | shared_sint.as_uint64); | |
3e7ee490 | 271 | |
454f18a9 | 272 | /* Enable the global synic bit */ |
f3c5e63c | 273 | sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL); |
f6feebe0 | 274 | sctrl.enable = 1; |
3e7ee490 | 275 | |
f3c5e63c | 276 | hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); |
dba61cda DC |
277 | } |
278 | ||
279 | int hv_synic_init(unsigned int cpu) | |
280 | { | |
281 | hv_synic_enable_regs(cpu); | |
3e7ee490 | 282 | |
4df4cb9e | 283 | hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT); |
e086748c | 284 | |
fd1fea68 | 285 | return 0; |
e086748c VK |
286 | } |
287 | ||
3e189519 | 288 | /* |
d44890c8 | 289 | * hv_synic_cleanup - Cleanup routine for hv_synic_init(). |
0831ad04 | 290 | */ |
dba61cda | 291 | void hv_synic_disable_regs(unsigned int cpu) |
3e7ee490 | 292 | { |
faff4406 TL |
293 | struct hv_per_cpu_context *hv_cpu |
294 | = per_cpu_ptr(hv_context.cpu_context, cpu); | |
b8dfb264 | 295 | union hv_synic_sint shared_sint; |
eacb1b4d GKH |
296 | union hv_synic_simp simp; |
297 | union hv_synic_siefp siefp; | |
e72e7ac5 | 298 | union hv_synic_scontrol sctrl; |
dba61cda | 299 | |
f3c5e63c MK |
300 | shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 + |
301 | VMBUS_MESSAGE_SINT); | |
dba61cda DC |
302 | |
303 | shared_sint.masked = 1; | |
304 | ||
305 | /* Need to correctly cleanup in the case of SMP!!! */ | |
306 | /* Disable the interrupt */ | |
f3c5e63c MK |
307 | hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, |
308 | shared_sint.as_uint64); | |
dba61cda | 309 | |
f3c5e63c | 310 | simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP); |
faff4406 TL |
311 | /* |
312 | * In Isolation VM, sim and sief pages are allocated by | |
313 | * paravisor. These pages also will be used by kdump | |
314 | * kernel. So just reset enable bit here and keep page | |
315 | * addresses. | |
316 | */ | |
dba61cda | 317 | simp.simp_enabled = 0; |
faff4406 TL |
318 | if (hv_isolation_type_snp()) |
319 | memunmap(hv_cpu->synic_message_page); | |
320 | else | |
321 | simp.base_simp_gpa = 0; | |
dba61cda | 322 | |
f3c5e63c | 323 | hv_set_register(HV_REGISTER_SIMP, simp.as_uint64); |
dba61cda | 324 | |
f3c5e63c | 325 | siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); |
dba61cda | 326 | siefp.siefp_enabled = 0; |
faff4406 TL |
327 | |
328 | if (hv_isolation_type_snp()) | |
329 | memunmap(hv_cpu->synic_event_page); | |
330 | else | |
331 | siefp.base_siefp_gpa = 0; | |
dba61cda | 332 | |
f3c5e63c | 333 | hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); |
dba61cda DC |
334 | |
335 | /* Disable the global synic bit */ | |
f3c5e63c | 336 | sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL); |
dba61cda | 337 | sctrl.enable = 0; |
f3c5e63c | 338 | hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); |
d608715d MK |
339 | |
340 | if (vmbus_irq != -1) | |
341 | disable_percpu_irq(vmbus_irq); | |
dba61cda DC |
342 | } |
343 | ||
b635ccc1 APM |
344 | #define HV_MAX_TRIES 3 |
345 | /* | |
346 | * Scan the event flags page of 'this' CPU looking for any bit that is set. If we find one | |
347 | * bit set, then wait for a few milliseconds. Repeat these steps for a maximum of 3 times. | |
348 | * Return 'true', if there is still any set bit after this operation; 'false', otherwise. | |
349 | * | |
350 | * If a bit is set, that means there is a pending channel interrupt. The expectation is | |
351 | * that the normal interrupt handling mechanism will find and process the channel interrupt | |
352 | * "very soon", and in the process clear the bit. | |
353 | */ | |
354 | static bool hv_synic_event_pending(void) | |
355 | { | |
356 | struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context); | |
357 | union hv_synic_event_flags *event = | |
358 | (union hv_synic_event_flags *)hv_cpu->synic_event_page + VMBUS_MESSAGE_SINT; | |
359 | unsigned long *recv_int_page = event->flags; /* assumes VMBus version >= VERSION_WIN8 */ | |
360 | bool pending; | |
361 | u32 relid; | |
362 | int tries = 0; | |
363 | ||
364 | retry: | |
365 | pending = false; | |
366 | for_each_set_bit(relid, recv_int_page, HV_EVENT_FLAGS_COUNT) { | |
367 | /* Special case - VMBus channel protocol messages */ | |
368 | if (relid == 0) | |
369 | continue; | |
370 | pending = true; | |
371 | break; | |
372 | } | |
373 | if (pending && tries++ < HV_MAX_TRIES) { | |
374 | usleep_range(10000, 20000); | |
375 | goto retry; | |
376 | } | |
377 | return pending; | |
378 | } | |
f3c5e63c | 379 | |
dba61cda DC |
380 | int hv_synic_cleanup(unsigned int cpu) |
381 | { | |
523b9408 VK |
382 | struct vmbus_channel *channel, *sc; |
383 | bool channel_found = false; | |
3e7ee490 | 384 | |
b635ccc1 APM |
385 | if (vmbus_connection.conn_state != CONNECTED) |
386 | goto always_cleanup; | |
387 | ||
8a857c55 APM |
388 | /* |
389 | * Hyper-V does not provide a way to change the connect CPU once | |
92e4dc8b CC |
390 | * it is set; we must prevent the connect CPU from going offline |
391 | * while the VM is running normally. But in the panic or kexec() | |
392 | * path where the vmbus is already disconnected, the CPU must be | |
393 | * allowed to shut down. | |
8a857c55 | 394 | */ |
b635ccc1 | 395 | if (cpu == VMBUS_CONNECT_CPU) |
8a857c55 APM |
396 | return -EBUSY; |
397 | ||
523b9408 VK |
398 | /* |
399 | * Search for channels which are bound to the CPU we're about to | |
d570aec0 APM |
400 | * cleanup. In case we find one and vmbus is still connected, we |
401 | * fail; this will effectively prevent CPU offlining. | |
402 | * | |
403 | * TODO: Re-bind the channels to different CPUs. | |
523b9408 VK |
404 | */ |
405 | mutex_lock(&vmbus_connection.channel_mutex); | |
406 | list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { | |
407 | if (channel->target_cpu == cpu) { | |
408 | channel_found = true; | |
409 | break; | |
410 | } | |
523b9408 VK |
411 | list_for_each_entry(sc, &channel->sc_list, sc_list) { |
412 | if (sc->target_cpu == cpu) { | |
413 | channel_found = true; | |
414 | break; | |
415 | } | |
416 | } | |
523b9408 VK |
417 | if (channel_found) |
418 | break; | |
419 | } | |
420 | mutex_unlock(&vmbus_connection.channel_mutex); | |
421 | ||
b635ccc1 APM |
422 | if (channel_found) |
423 | return -EBUSY; | |
424 | ||
425 | /* | |
426 | * channel_found == false means that any channels that were previously | |
427 | * assigned to the CPU have been reassigned elsewhere with a call of | |
428 | * vmbus_send_modifychannel(). Scan the event flags page looking for | |
429 | * bits that are set and waiting with a timeout for vmbus_chan_sched() | |
430 | * to process such bits. If bits are still set after this operation | |
431 | * and VMBus is connected, fail the CPU offlining operation. | |
432 | */ | |
433 | if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending()) | |
523b9408 VK |
434 | return -EBUSY; |
435 | ||
b635ccc1 | 436 | always_cleanup: |
4df4cb9e | 437 | hv_stimer_legacy_cleanup(cpu); |
e086748c | 438 | |
dba61cda | 439 | hv_synic_disable_regs(cpu); |
76d36ab7 VK |
440 | |
441 | return 0; | |
3e7ee490 | 442 | } |