Commit | Line | Data |
---|---|---|
3b20eb23 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
3e7ee490 | 2 | /* |
3e7ee490 HJ |
3 | * Copyright (c) 2009, Microsoft Corporation. |
4 | * | |
3e7ee490 HJ |
5 | * Authors: |
6 | * Haiyang Zhang <haiyangz@microsoft.com> | |
7 | * Hank Janssen <hjanssen@microsoft.com> | |
b0069f43 | 8 | * K. Y. Srinivasan <kys@microsoft.com> |
3e7ee490 | 9 | */ |
0a46618d HJ |
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
11 | ||
3e7ee490 HJ |
12 | #include <linux/init.h> |
13 | #include <linux/module.h> | |
14 | #include <linux/device.h> | |
3e7ee490 HJ |
15 | #include <linux/interrupt.h> |
16 | #include <linux/sysctl.h> | |
5a0e3ad6 | 17 | #include <linux/slab.h> |
b0069f43 | 18 | #include <linux/acpi.h> |
8b5d6d3b | 19 | #include <linux/completion.h> |
46a97191 | 20 | #include <linux/hyperv.h> |
b0209501 | 21 | #include <linux/kernel_stat.h> |
4061ed9e | 22 | #include <linux/clockchips.h> |
e513229b | 23 | #include <linux/cpu.h> |
68db0cf1 IM |
24 | #include <linux/sched/task_stack.h> |
25 | ||
1f48dcf1 | 26 | #include <linux/delay.h> |
96c1d058 | 27 | #include <linux/notifier.h> |
f39650de | 28 | #include <linux/panic_notifier.h> |
96c1d058 | 29 | #include <linux/ptrace.h> |
35464483 | 30 | #include <linux/screen_info.h> |
510f7aef | 31 | #include <linux/kdebug.h> |
6d146aef | 32 | #include <linux/efi.h> |
4b44f2d1 | 33 | #include <linux/random.h> |
f3a99e76 | 34 | #include <linux/kernel.h> |
63ecc6d2 | 35 | #include <linux/syscore_ops.h> |
743b237c | 36 | #include <linux/dma-map-ops.h> |
fd1fea68 | 37 | #include <clocksource/hyperv_timer.h> |
0f2a6619 | 38 | #include "hyperv_vmbus.h" |
3e7ee490 | 39 | |
fc76936d SH |
40 | struct vmbus_dynid { |
41 | struct list_head node; | |
42 | struct hv_vmbus_device_id id; | |
43 | }; | |
44 | ||
607c1a11 | 45 | static struct acpi_device *hv_acpi_dev; |
1168ac22 | 46 | |
71a6655d | 47 | static struct completion probe_event; |
98db4335 | 48 | |
76d36ab7 | 49 | static int hyperv_cpuhp_online; |
96c1d058 | 50 | |
81b18bce SM |
51 | static void *hv_panic_page; |
52 | ||
d608715d MK |
53 | static long __percpu *vmbus_evt; |
54 | ||
626b901f | 55 | /* Values parsed from ACPI DSDT */ |
d608715d | 56 | int vmbus_irq; |
626b901f MK |
57 | int vmbus_interrupt; |
58 | ||
040026df TL |
59 | /* |
60 | * Boolean to control whether to report panic messages over Hyper-V. | |
61 | * | |
b18e3589 | 62 | * It can be set via /proc/sys/kernel/hyperv_record_panic_msg |
040026df TL |
63 | */ |
64 | static int sysctl_record_panic_msg = 1; | |
65 | ||
66 | static int hyperv_report_reg(void) | |
67 | { | |
68 | return !sysctl_record_panic_msg || !hv_panic_page; | |
69 | } | |
70 | ||
510f7aef VK |
71 | static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, |
72 | void *args) | |
73 | { | |
74 | struct pt_regs *regs; | |
75 | ||
74347a99 | 76 | vmbus_initiate_unload(true); |
510f7aef | 77 | |
73f26e52 TL |
78 | /* |
79 | * Hyper-V should be notified only once about a panic. If we will be | |
9f8b577f APM |
80 | * doing hv_kmsg_dump() with kmsg data later, don't do the notification |
81 | * here. | |
73f26e52 TL |
82 | */ |
83 | if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE | |
040026df | 84 | && hyperv_report_reg()) { |
74347a99 | 85 | regs = current_pt_regs(); |
f3a99e76 | 86 | hyperv_report_panic(regs, val, false); |
74347a99 | 87 | } |
96c1d058 NM |
88 | return NOTIFY_DONE; |
89 | } | |
90 | ||
510f7aef VK |
91 | static int hyperv_die_event(struct notifier_block *nb, unsigned long val, |
92 | void *args) | |
93 | { | |
49971e6b | 94 | struct die_args *die = args; |
510f7aef VK |
95 | struct pt_regs *regs = die->regs; |
96 | ||
608a973b MK |
97 | /* Don't notify Hyper-V if the die event is other than oops */ |
98 | if (val != DIE_OOPS) | |
99 | return NOTIFY_DONE; | |
100 | ||
73f26e52 TL |
101 | /* |
102 | * Hyper-V should be notified only once about a panic. If we will be | |
9f8b577f APM |
103 | * doing hv_kmsg_dump() with kmsg data later, don't do the notification |
104 | * here. | |
73f26e52 | 105 | */ |
040026df | 106 | if (hyperv_report_reg()) |
f3a99e76 | 107 | hyperv_report_panic(regs, val, true); |
510f7aef VK |
108 | return NOTIFY_DONE; |
109 | } | |
110 | ||
111 | static struct notifier_block hyperv_die_block = { | |
112 | .notifier_call = hyperv_die_event, | |
113 | }; | |
96c1d058 NM |
114 | static struct notifier_block hyperv_panic_block = { |
115 | .notifier_call = hyperv_panic_event, | |
116 | }; | |
117 | ||
6d146aef JO |
118 | static const char *fb_mmio_name = "fb_range"; |
119 | static struct resource *fb_mmio; | |
e2e80841 | 120 | static struct resource *hyperv_mmio; |
8aea7f82 | 121 | static DEFINE_MUTEX(hyperv_mmio_lock); |
98db4335 | 122 | |
cf6a2eac S |
123 | static int vmbus_exists(void) |
124 | { | |
125 | if (hv_acpi_dev == NULL) | |
126 | return -ENODEV; | |
127 | ||
128 | return 0; | |
129 | } | |
130 | ||
c2e5df61 | 131 | static u8 channel_monitor_group(const struct vmbus_channel *channel) |
76c52bbe GKH |
132 | { |
133 | return (u8)channel->offermsg.monitorid / 32; | |
134 | } | |
135 | ||
c2e5df61 | 136 | static u8 channel_monitor_offset(const struct vmbus_channel *channel) |
76c52bbe GKH |
137 | { |
138 | return (u8)channel->offermsg.monitorid % 32; | |
139 | } | |
140 | ||
c2e5df61 SH |
141 | static u32 channel_pending(const struct vmbus_channel *channel, |
142 | const struct hv_monitor_page *monitor_page) | |
76c52bbe GKH |
143 | { |
144 | u8 monitor_group = channel_monitor_group(channel); | |
c2e5df61 | 145 | |
76c52bbe GKH |
146 | return monitor_page->trigger_group[monitor_group].pending; |
147 | } | |
148 | ||
c2e5df61 SH |
149 | static u32 channel_latency(const struct vmbus_channel *channel, |
150 | const struct hv_monitor_page *monitor_page) | |
1cee272b GKH |
151 | { |
152 | u8 monitor_group = channel_monitor_group(channel); | |
153 | u8 monitor_offset = channel_monitor_offset(channel); | |
c2e5df61 | 154 | |
1cee272b GKH |
155 | return monitor_page->latency[monitor_group][monitor_offset]; |
156 | } | |
157 | ||
4947c745 GKH |
158 | static u32 channel_conn_id(struct vmbus_channel *channel, |
159 | struct hv_monitor_page *monitor_page) | |
160 | { | |
161 | u8 monitor_group = channel_monitor_group(channel); | |
162 | u8 monitor_offset = channel_monitor_offset(channel); | |
e4f2212e | 163 | |
4947c745 GKH |
164 | return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id; |
165 | } | |
166 | ||
03f3a910 GKH |
167 | static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr, |
168 | char *buf) | |
169 | { | |
170 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
171 | ||
172 | if (!hv_dev->channel) | |
173 | return -ENODEV; | |
174 | return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid); | |
175 | } | |
176 | static DEVICE_ATTR_RO(id); | |
177 | ||
a8fb5f3d GKH |
178 | static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr, |
179 | char *buf) | |
180 | { | |
181 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
182 | ||
183 | if (!hv_dev->channel) | |
184 | return -ENODEV; | |
185 | return sprintf(buf, "%d\n", hv_dev->channel->state); | |
186 | } | |
187 | static DEVICE_ATTR_RO(state); | |
188 | ||
5ffd00e2 GKH |
189 | static ssize_t monitor_id_show(struct device *dev, |
190 | struct device_attribute *dev_attr, char *buf) | |
191 | { | |
192 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
193 | ||
194 | if (!hv_dev->channel) | |
195 | return -ENODEV; | |
196 | return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid); | |
197 | } | |
198 | static DEVICE_ATTR_RO(monitor_id); | |
199 | ||
68234c04 GKH |
200 | static ssize_t class_id_show(struct device *dev, |
201 | struct device_attribute *dev_attr, char *buf) | |
202 | { | |
203 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
204 | ||
205 | if (!hv_dev->channel) | |
206 | return -ENODEV; | |
207 | return sprintf(buf, "{%pUl}\n", | |
458c4475 | 208 | &hv_dev->channel->offermsg.offer.if_type); |
68234c04 GKH |
209 | } |
210 | static DEVICE_ATTR_RO(class_id); | |
211 | ||
7c55e1d0 GKH |
212 | static ssize_t device_id_show(struct device *dev, |
213 | struct device_attribute *dev_attr, char *buf) | |
214 | { | |
215 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
216 | ||
217 | if (!hv_dev->channel) | |
218 | return -ENODEV; | |
219 | return sprintf(buf, "{%pUl}\n", | |
458c4475 | 220 | &hv_dev->channel->offermsg.offer.if_instance); |
7c55e1d0 GKH |
221 | } |
222 | static DEVICE_ATTR_RO(device_id); | |
223 | ||
647fa371 GKH |
224 | static ssize_t modalias_show(struct device *dev, |
225 | struct device_attribute *dev_attr, char *buf) | |
226 | { | |
227 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
647fa371 | 228 | |
0027e3fd | 229 | return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type); |
647fa371 GKH |
230 | } |
231 | static DEVICE_ATTR_RO(modalias); | |
232 | ||
7ceb1c37 SH |
233 | #ifdef CONFIG_NUMA |
234 | static ssize_t numa_node_show(struct device *dev, | |
235 | struct device_attribute *attr, char *buf) | |
236 | { | |
237 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
238 | ||
239 | if (!hv_dev->channel) | |
240 | return -ENODEV; | |
241 | ||
458d090f | 242 | return sprintf(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu)); |
7ceb1c37 SH |
243 | } |
244 | static DEVICE_ATTR_RO(numa_node); | |
245 | #endif | |
246 | ||
76c52bbe GKH |
247 | static ssize_t server_monitor_pending_show(struct device *dev, |
248 | struct device_attribute *dev_attr, | |
249 | char *buf) | |
250 | { | |
251 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
252 | ||
253 | if (!hv_dev->channel) | |
254 | return -ENODEV; | |
255 | return sprintf(buf, "%d\n", | |
256 | channel_pending(hv_dev->channel, | |
fd8e3c35 | 257 | vmbus_connection.monitor_pages[0])); |
76c52bbe GKH |
258 | } |
259 | static DEVICE_ATTR_RO(server_monitor_pending); | |
260 | ||
261 | static ssize_t client_monitor_pending_show(struct device *dev, | |
262 | struct device_attribute *dev_attr, | |
263 | char *buf) | |
264 | { | |
265 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
266 | ||
267 | if (!hv_dev->channel) | |
268 | return -ENODEV; | |
269 | return sprintf(buf, "%d\n", | |
270 | channel_pending(hv_dev->channel, | |
271 | vmbus_connection.monitor_pages[1])); | |
272 | } | |
273 | static DEVICE_ATTR_RO(client_monitor_pending); | |
68234c04 | 274 | |
1cee272b GKH |
275 | static ssize_t server_monitor_latency_show(struct device *dev, |
276 | struct device_attribute *dev_attr, | |
277 | char *buf) | |
278 | { | |
279 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
280 | ||
281 | if (!hv_dev->channel) | |
282 | return -ENODEV; | |
283 | return sprintf(buf, "%d\n", | |
284 | channel_latency(hv_dev->channel, | |
285 | vmbus_connection.monitor_pages[0])); | |
286 | } | |
287 | static DEVICE_ATTR_RO(server_monitor_latency); | |
288 | ||
289 | static ssize_t client_monitor_latency_show(struct device *dev, | |
290 | struct device_attribute *dev_attr, | |
291 | char *buf) | |
292 | { | |
293 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
294 | ||
295 | if (!hv_dev->channel) | |
296 | return -ENODEV; | |
297 | return sprintf(buf, "%d\n", | |
298 | channel_latency(hv_dev->channel, | |
299 | vmbus_connection.monitor_pages[1])); | |
300 | } | |
301 | static DEVICE_ATTR_RO(client_monitor_latency); | |
302 | ||
4947c745 GKH |
303 | static ssize_t server_monitor_conn_id_show(struct device *dev, |
304 | struct device_attribute *dev_attr, | |
305 | char *buf) | |
306 | { | |
307 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
308 | ||
309 | if (!hv_dev->channel) | |
310 | return -ENODEV; | |
311 | return sprintf(buf, "%d\n", | |
312 | channel_conn_id(hv_dev->channel, | |
313 | vmbus_connection.monitor_pages[0])); | |
314 | } | |
315 | static DEVICE_ATTR_RO(server_monitor_conn_id); | |
316 | ||
317 | static ssize_t client_monitor_conn_id_show(struct device *dev, | |
318 | struct device_attribute *dev_attr, | |
319 | char *buf) | |
320 | { | |
321 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
322 | ||
323 | if (!hv_dev->channel) | |
324 | return -ENODEV; | |
325 | return sprintf(buf, "%d\n", | |
326 | channel_conn_id(hv_dev->channel, | |
327 | vmbus_connection.monitor_pages[1])); | |
328 | } | |
329 | static DEVICE_ATTR_RO(client_monitor_conn_id); | |
330 | ||
98f4c651 GKH |
331 | static ssize_t out_intr_mask_show(struct device *dev, |
332 | struct device_attribute *dev_attr, char *buf) | |
333 | { | |
334 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
335 | struct hv_ring_buffer_debug_info outbound; | |
ba50bf1c | 336 | int ret; |
98f4c651 GKH |
337 | |
338 | if (!hv_dev->channel) | |
339 | return -ENODEV; | |
ba50bf1c DC |
340 | |
341 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, | |
342 | &outbound); | |
343 | if (ret < 0) | |
344 | return ret; | |
345 | ||
98f4c651 GKH |
346 | return sprintf(buf, "%d\n", outbound.current_interrupt_mask); |
347 | } | |
348 | static DEVICE_ATTR_RO(out_intr_mask); | |
349 | ||
350 | static ssize_t out_read_index_show(struct device *dev, | |
351 | struct device_attribute *dev_attr, char *buf) | |
352 | { | |
353 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
354 | struct hv_ring_buffer_debug_info outbound; | |
ba50bf1c | 355 | int ret; |
98f4c651 GKH |
356 | |
357 | if (!hv_dev->channel) | |
358 | return -ENODEV; | |
ba50bf1c DC |
359 | |
360 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, | |
361 | &outbound); | |
362 | if (ret < 0) | |
363 | return ret; | |
98f4c651 GKH |
364 | return sprintf(buf, "%d\n", outbound.current_read_index); |
365 | } | |
366 | static DEVICE_ATTR_RO(out_read_index); | |
367 | ||
368 | static ssize_t out_write_index_show(struct device *dev, | |
369 | struct device_attribute *dev_attr, | |
370 | char *buf) | |
371 | { | |
372 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
373 | struct hv_ring_buffer_debug_info outbound; | |
ba50bf1c | 374 | int ret; |
98f4c651 GKH |
375 | |
376 | if (!hv_dev->channel) | |
377 | return -ENODEV; | |
ba50bf1c DC |
378 | |
379 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, | |
380 | &outbound); | |
381 | if (ret < 0) | |
382 | return ret; | |
98f4c651 GKH |
383 | return sprintf(buf, "%d\n", outbound.current_write_index); |
384 | } | |
385 | static DEVICE_ATTR_RO(out_write_index); | |
386 | ||
387 | static ssize_t out_read_bytes_avail_show(struct device *dev, | |
388 | struct device_attribute *dev_attr, | |
389 | char *buf) | |
390 | { | |
391 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
392 | struct hv_ring_buffer_debug_info outbound; | |
ba50bf1c | 393 | int ret; |
98f4c651 GKH |
394 | |
395 | if (!hv_dev->channel) | |
396 | return -ENODEV; | |
ba50bf1c DC |
397 | |
398 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, | |
399 | &outbound); | |
400 | if (ret < 0) | |
401 | return ret; | |
98f4c651 GKH |
402 | return sprintf(buf, "%d\n", outbound.bytes_avail_toread); |
403 | } | |
404 | static DEVICE_ATTR_RO(out_read_bytes_avail); | |
405 | ||
406 | static ssize_t out_write_bytes_avail_show(struct device *dev, | |
407 | struct device_attribute *dev_attr, | |
408 | char *buf) | |
409 | { | |
410 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
411 | struct hv_ring_buffer_debug_info outbound; | |
ba50bf1c | 412 | int ret; |
98f4c651 GKH |
413 | |
414 | if (!hv_dev->channel) | |
415 | return -ENODEV; | |
ba50bf1c DC |
416 | |
417 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, | |
418 | &outbound); | |
419 | if (ret < 0) | |
420 | return ret; | |
98f4c651 GKH |
421 | return sprintf(buf, "%d\n", outbound.bytes_avail_towrite); |
422 | } | |
423 | static DEVICE_ATTR_RO(out_write_bytes_avail); | |
424 | ||
425 | static ssize_t in_intr_mask_show(struct device *dev, | |
426 | struct device_attribute *dev_attr, char *buf) | |
427 | { | |
428 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
429 | struct hv_ring_buffer_debug_info inbound; | |
ba50bf1c | 430 | int ret; |
98f4c651 GKH |
431 | |
432 | if (!hv_dev->channel) | |
433 | return -ENODEV; | |
ba50bf1c DC |
434 | |
435 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); | |
436 | if (ret < 0) | |
437 | return ret; | |
438 | ||
98f4c651 GKH |
439 | return sprintf(buf, "%d\n", inbound.current_interrupt_mask); |
440 | } | |
441 | static DEVICE_ATTR_RO(in_intr_mask); | |
442 | ||
443 | static ssize_t in_read_index_show(struct device *dev, | |
444 | struct device_attribute *dev_attr, char *buf) | |
445 | { | |
446 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
447 | struct hv_ring_buffer_debug_info inbound; | |
ba50bf1c | 448 | int ret; |
98f4c651 GKH |
449 | |
450 | if (!hv_dev->channel) | |
451 | return -ENODEV; | |
ba50bf1c DC |
452 | |
453 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); | |
454 | if (ret < 0) | |
455 | return ret; | |
456 | ||
98f4c651 GKH |
457 | return sprintf(buf, "%d\n", inbound.current_read_index); |
458 | } | |
459 | static DEVICE_ATTR_RO(in_read_index); | |
460 | ||
461 | static ssize_t in_write_index_show(struct device *dev, | |
462 | struct device_attribute *dev_attr, char *buf) | |
463 | { | |
464 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
465 | struct hv_ring_buffer_debug_info inbound; | |
ba50bf1c | 466 | int ret; |
98f4c651 GKH |
467 | |
468 | if (!hv_dev->channel) | |
469 | return -ENODEV; | |
ba50bf1c DC |
470 | |
471 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); | |
472 | if (ret < 0) | |
473 | return ret; | |
474 | ||
98f4c651 GKH |
475 | return sprintf(buf, "%d\n", inbound.current_write_index); |
476 | } | |
477 | static DEVICE_ATTR_RO(in_write_index); | |
478 | ||
479 | static ssize_t in_read_bytes_avail_show(struct device *dev, | |
480 | struct device_attribute *dev_attr, | |
481 | char *buf) | |
482 | { | |
483 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
484 | struct hv_ring_buffer_debug_info inbound; | |
ba50bf1c | 485 | int ret; |
98f4c651 GKH |
486 | |
487 | if (!hv_dev->channel) | |
488 | return -ENODEV; | |
ba50bf1c DC |
489 | |
490 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); | |
491 | if (ret < 0) | |
492 | return ret; | |
493 | ||
98f4c651 GKH |
494 | return sprintf(buf, "%d\n", inbound.bytes_avail_toread); |
495 | } | |
496 | static DEVICE_ATTR_RO(in_read_bytes_avail); | |
497 | ||
498 | static ssize_t in_write_bytes_avail_show(struct device *dev, | |
499 | struct device_attribute *dev_attr, | |
500 | char *buf) | |
501 | { | |
502 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
503 | struct hv_ring_buffer_debug_info inbound; | |
ba50bf1c | 504 | int ret; |
98f4c651 GKH |
505 | |
506 | if (!hv_dev->channel) | |
507 | return -ENODEV; | |
ba50bf1c DC |
508 | |
509 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); | |
510 | if (ret < 0) | |
511 | return ret; | |
512 | ||
98f4c651 GKH |
513 | return sprintf(buf, "%d\n", inbound.bytes_avail_towrite); |
514 | } | |
515 | static DEVICE_ATTR_RO(in_write_bytes_avail); | |
516 | ||
042ab031 DC |
517 | static ssize_t channel_vp_mapping_show(struct device *dev, |
518 | struct device_attribute *dev_attr, | |
519 | char *buf) | |
520 | { | |
521 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
522 | struct vmbus_channel *channel = hv_dev->channel, *cur_sc; | |
042ab031 DC |
523 | int buf_size = PAGE_SIZE, n_written, tot_written; |
524 | struct list_head *cur; | |
525 | ||
526 | if (!channel) | |
527 | return -ENODEV; | |
528 | ||
3eb0ac86 APM |
529 | mutex_lock(&vmbus_connection.channel_mutex); |
530 | ||
042ab031 DC |
531 | tot_written = snprintf(buf, buf_size, "%u:%u\n", |
532 | channel->offermsg.child_relid, channel->target_cpu); | |
533 | ||
042ab031 DC |
534 | list_for_each(cur, &channel->sc_list) { |
535 | if (tot_written >= buf_size - 1) | |
536 | break; | |
537 | ||
538 | cur_sc = list_entry(cur, struct vmbus_channel, sc_list); | |
539 | n_written = scnprintf(buf + tot_written, | |
540 | buf_size - tot_written, | |
541 | "%u:%u\n", | |
542 | cur_sc->offermsg.child_relid, | |
543 | cur_sc->target_cpu); | |
544 | tot_written += n_written; | |
545 | } | |
546 | ||
3eb0ac86 | 547 | mutex_unlock(&vmbus_connection.channel_mutex); |
042ab031 DC |
548 | |
549 | return tot_written; | |
550 | } | |
551 | static DEVICE_ATTR_RO(channel_vp_mapping); | |
552 | ||
7047f17d S |
553 | static ssize_t vendor_show(struct device *dev, |
554 | struct device_attribute *dev_attr, | |
555 | char *buf) | |
556 | { | |
557 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
e4f2212e | 558 | |
7047f17d S |
559 | return sprintf(buf, "0x%x\n", hv_dev->vendor_id); |
560 | } | |
561 | static DEVICE_ATTR_RO(vendor); | |
562 | ||
563 | static ssize_t device_show(struct device *dev, | |
564 | struct device_attribute *dev_attr, | |
565 | char *buf) | |
566 | { | |
567 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
e4f2212e | 568 | |
7047f17d S |
569 | return sprintf(buf, "0x%x\n", hv_dev->device_id); |
570 | } | |
571 | static DEVICE_ATTR_RO(device); | |
572 | ||
d765edbb SH |
573 | static ssize_t driver_override_store(struct device *dev, |
574 | struct device_attribute *attr, | |
575 | const char *buf, size_t count) | |
576 | { | |
577 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
578 | char *driver_override, *old, *cp; | |
579 | ||
580 | /* We need to keep extra room for a newline */ | |
581 | if (count >= (PAGE_SIZE - 1)) | |
582 | return -EINVAL; | |
583 | ||
584 | driver_override = kstrndup(buf, count, GFP_KERNEL); | |
585 | if (!driver_override) | |
586 | return -ENOMEM; | |
587 | ||
588 | cp = strchr(driver_override, '\n'); | |
589 | if (cp) | |
590 | *cp = '\0'; | |
591 | ||
592 | device_lock(dev); | |
593 | old = hv_dev->driver_override; | |
594 | if (strlen(driver_override)) { | |
595 | hv_dev->driver_override = driver_override; | |
596 | } else { | |
597 | kfree(driver_override); | |
598 | hv_dev->driver_override = NULL; | |
599 | } | |
600 | device_unlock(dev); | |
601 | ||
602 | kfree(old); | |
603 | ||
604 | return count; | |
605 | } | |
606 | ||
607 | static ssize_t driver_override_show(struct device *dev, | |
608 | struct device_attribute *attr, char *buf) | |
609 | { | |
610 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
611 | ssize_t len; | |
612 | ||
613 | device_lock(dev); | |
614 | len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override); | |
615 | device_unlock(dev); | |
616 | ||
617 | return len; | |
618 | } | |
619 | static DEVICE_ATTR_RW(driver_override); | |
620 | ||
98f4c651 | 621 | /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */ |
fc76936d | 622 | static struct attribute *vmbus_dev_attrs[] = { |
03f3a910 | 623 | &dev_attr_id.attr, |
a8fb5f3d | 624 | &dev_attr_state.attr, |
5ffd00e2 | 625 | &dev_attr_monitor_id.attr, |
68234c04 | 626 | &dev_attr_class_id.attr, |
7c55e1d0 | 627 | &dev_attr_device_id.attr, |
647fa371 | 628 | &dev_attr_modalias.attr, |
7ceb1c37 SH |
629 | #ifdef CONFIG_NUMA |
630 | &dev_attr_numa_node.attr, | |
631 | #endif | |
76c52bbe GKH |
632 | &dev_attr_server_monitor_pending.attr, |
633 | &dev_attr_client_monitor_pending.attr, | |
1cee272b GKH |
634 | &dev_attr_server_monitor_latency.attr, |
635 | &dev_attr_client_monitor_latency.attr, | |
4947c745 GKH |
636 | &dev_attr_server_monitor_conn_id.attr, |
637 | &dev_attr_client_monitor_conn_id.attr, | |
98f4c651 GKH |
638 | &dev_attr_out_intr_mask.attr, |
639 | &dev_attr_out_read_index.attr, | |
640 | &dev_attr_out_write_index.attr, | |
641 | &dev_attr_out_read_bytes_avail.attr, | |
642 | &dev_attr_out_write_bytes_avail.attr, | |
643 | &dev_attr_in_intr_mask.attr, | |
644 | &dev_attr_in_read_index.attr, | |
645 | &dev_attr_in_write_index.attr, | |
646 | &dev_attr_in_read_bytes_avail.attr, | |
647 | &dev_attr_in_write_bytes_avail.attr, | |
042ab031 | 648 | &dev_attr_channel_vp_mapping.attr, |
7047f17d S |
649 | &dev_attr_vendor.attr, |
650 | &dev_attr_device.attr, | |
d765edbb | 651 | &dev_attr_driver_override.attr, |
03f3a910 GKH |
652 | NULL, |
653 | }; | |
46fc1548 KB |
654 | |
655 | /* | |
656 | * Device-level attribute_group callback function. Returns the permission for | |
657 | * each attribute, and returns 0 if an attribute is not visible. | |
658 | */ | |
659 | static umode_t vmbus_dev_attr_is_visible(struct kobject *kobj, | |
660 | struct attribute *attr, int idx) | |
661 | { | |
662 | struct device *dev = kobj_to_dev(kobj); | |
663 | const struct hv_device *hv_dev = device_to_hv_device(dev); | |
664 | ||
665 | /* Hide the monitor attributes if the monitor mechanism is not used. */ | |
666 | if (!hv_dev->channel->offermsg.monitor_allocated && | |
667 | (attr == &dev_attr_monitor_id.attr || | |
668 | attr == &dev_attr_server_monitor_pending.attr || | |
669 | attr == &dev_attr_client_monitor_pending.attr || | |
670 | attr == &dev_attr_server_monitor_latency.attr || | |
671 | attr == &dev_attr_client_monitor_latency.attr || | |
672 | attr == &dev_attr_server_monitor_conn_id.attr || | |
673 | attr == &dev_attr_client_monitor_conn_id.attr)) | |
674 | return 0; | |
675 | ||
676 | return attr->mode; | |
677 | } | |
678 | ||
679 | static const struct attribute_group vmbus_dev_group = { | |
680 | .attrs = vmbus_dev_attrs, | |
681 | .is_visible = vmbus_dev_attr_is_visible | |
682 | }; | |
683 | __ATTRIBUTE_GROUPS(vmbus_dev); | |
03f3a910 | 684 | |
c068e3f4 DC |
685 | /* Set up the attribute for /sys/bus/vmbus/hibernation */ |
686 | static ssize_t hibernation_show(struct bus_type *bus, char *buf) | |
687 | { | |
688 | return sprintf(buf, "%d\n", !!hv_is_hibernation_supported()); | |
689 | } | |
690 | ||
691 | static BUS_ATTR_RO(hibernation); | |
692 | ||
693 | static struct attribute *vmbus_bus_attrs[] = { | |
694 | &bus_attr_hibernation.attr, | |
695 | NULL, | |
696 | }; | |
697 | static const struct attribute_group vmbus_bus_group = { | |
698 | .attrs = vmbus_bus_attrs, | |
699 | }; | |
700 | __ATTRIBUTE_GROUPS(vmbus_bus); | |
701 | ||
adde2487 S |
702 | /* |
703 | * vmbus_uevent - add uevent for our device | |
704 | * | |
705 | * This routine is invoked when a device is added or removed on the vmbus to | |
706 | * generate a uevent to udev in the userspace. The udev will then look at its | |
707 | * rule and the uevent generated here to load the appropriate driver | |
0ddda660 S |
708 | * |
709 | * The alias string will be of the form vmbus:guid where guid is the string | |
710 | * representation of the device guid (each byte of the guid will be | |
711 | * represented with two hex characters. | |
adde2487 S |
712 | */ |
713 | static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env) | |
714 | { | |
715 | struct hv_device *dev = device_to_hv_device(device); | |
0027e3fd | 716 | const char *format = "MODALIAS=vmbus:%*phN"; |
0ddda660 | 717 | |
0027e3fd | 718 | return add_uevent_var(env, format, UUID_SIZE, &dev->dev_type); |
adde2487 S |
719 | } |
720 | ||
d765edbb | 721 | static const struct hv_vmbus_device_id * |
593db803 | 722 | hv_vmbus_dev_match(const struct hv_vmbus_device_id *id, const guid_t *guid) |
d765edbb SH |
723 | { |
724 | if (id == NULL) | |
725 | return NULL; /* empty device table */ | |
726 | ||
593db803 AS |
727 | for (; !guid_is_null(&id->guid); id++) |
728 | if (guid_equal(&id->guid, guid)) | |
d765edbb SH |
729 | return id; |
730 | ||
731 | return NULL; | |
732 | } | |
733 | ||
734 | static const struct hv_vmbus_device_id * | |
593db803 | 735 | hv_vmbus_dynid_match(struct hv_driver *drv, const guid_t *guid) |
3037a7b6 | 736 | { |
fc76936d SH |
737 | const struct hv_vmbus_device_id *id = NULL; |
738 | struct vmbus_dynid *dynid; | |
739 | ||
fc76936d SH |
740 | spin_lock(&drv->dynids.lock); |
741 | list_for_each_entry(dynid, &drv->dynids.list, node) { | |
593db803 | 742 | if (guid_equal(&dynid->id.guid, guid)) { |
fc76936d SH |
743 | id = &dynid->id; |
744 | break; | |
745 | } | |
746 | } | |
747 | spin_unlock(&drv->dynids.lock); | |
748 | ||
d765edbb SH |
749 | return id; |
750 | } | |
fc76936d | 751 | |
593db803 | 752 | static const struct hv_vmbus_device_id vmbus_device_null; |
fc76936d | 753 | |
d765edbb SH |
754 | /* |
755 | * Return a matching hv_vmbus_device_id pointer. | |
756 | * If there is no match, return NULL. | |
757 | */ | |
758 | static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv, | |
759 | struct hv_device *dev) | |
760 | { | |
593db803 | 761 | const guid_t *guid = &dev->dev_type; |
d765edbb | 762 | const struct hv_vmbus_device_id *id; |
3037a7b6 | 763 | |
d765edbb SH |
764 | /* When driver_override is set, only bind to the matching driver */ |
765 | if (dev->driver_override && strcmp(dev->driver_override, drv->name)) | |
766 | return NULL; | |
767 | ||
768 | /* Look at the dynamic ids first, before the static ones */ | |
769 | id = hv_vmbus_dynid_match(drv, guid); | |
770 | if (!id) | |
771 | id = hv_vmbus_dev_match(drv->id_table, guid); | |
772 | ||
773 | /* driver_override will always match, send a dummy id */ | |
774 | if (!id && dev->driver_override) | |
775 | id = &vmbus_device_null; | |
776 | ||
777 | return id; | |
3037a7b6 S |
778 | } |
779 | ||
fc76936d | 780 | /* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */ |
593db803 | 781 | static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid) |
fc76936d SH |
782 | { |
783 | struct vmbus_dynid *dynid; | |
784 | ||
785 | dynid = kzalloc(sizeof(*dynid), GFP_KERNEL); | |
786 | if (!dynid) | |
787 | return -ENOMEM; | |
788 | ||
789 | dynid->id.guid = *guid; | |
790 | ||
791 | spin_lock(&drv->dynids.lock); | |
792 | list_add_tail(&dynid->node, &drv->dynids.list); | |
793 | spin_unlock(&drv->dynids.lock); | |
794 | ||
795 | return driver_attach(&drv->driver); | |
796 | } | |
797 | ||
798 | static void vmbus_free_dynids(struct hv_driver *drv) | |
799 | { | |
800 | struct vmbus_dynid *dynid, *n; | |
801 | ||
802 | spin_lock(&drv->dynids.lock); | |
803 | list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { | |
804 | list_del(&dynid->node); | |
805 | kfree(dynid); | |
806 | } | |
807 | spin_unlock(&drv->dynids.lock); | |
808 | } | |
809 | ||
fc76936d SH |
810 | /* |
811 | * store_new_id - sysfs frontend to vmbus_add_dynid() | |
812 | * | |
813 | * Allow GUIDs to be added to an existing driver via sysfs. | |
814 | */ | |
815 | static ssize_t new_id_store(struct device_driver *driver, const char *buf, | |
816 | size_t count) | |
817 | { | |
818 | struct hv_driver *drv = drv_to_hv_drv(driver); | |
593db803 | 819 | guid_t guid; |
fc76936d SH |
820 | ssize_t retval; |
821 | ||
593db803 | 822 | retval = guid_parse(buf, &guid); |
31100108 AS |
823 | if (retval) |
824 | return retval; | |
fc76936d | 825 | |
d765edbb | 826 | if (hv_vmbus_dynid_match(drv, &guid)) |
fc76936d SH |
827 | return -EEXIST; |
828 | ||
829 | retval = vmbus_add_dynid(drv, &guid); | |
830 | if (retval) | |
831 | return retval; | |
832 | return count; | |
833 | } | |
834 | static DRIVER_ATTR_WO(new_id); | |
835 | ||
836 | /* | |
837 | * store_remove_id - remove a PCI device ID from this driver | |
838 | * | |
839 | * Removes a dynamic pci device ID to this driver. | |
840 | */ | |
841 | static ssize_t remove_id_store(struct device_driver *driver, const char *buf, | |
842 | size_t count) | |
843 | { | |
844 | struct hv_driver *drv = drv_to_hv_drv(driver); | |
845 | struct vmbus_dynid *dynid, *n; | |
593db803 | 846 | guid_t guid; |
31100108 | 847 | ssize_t retval; |
fc76936d | 848 | |
593db803 | 849 | retval = guid_parse(buf, &guid); |
31100108 AS |
850 | if (retval) |
851 | return retval; | |
fc76936d | 852 | |
31100108 | 853 | retval = -ENODEV; |
fc76936d SH |
854 | spin_lock(&drv->dynids.lock); |
855 | list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { | |
856 | struct hv_vmbus_device_id *id = &dynid->id; | |
857 | ||
593db803 | 858 | if (guid_equal(&id->guid, &guid)) { |
fc76936d SH |
859 | list_del(&dynid->node); |
860 | kfree(dynid); | |
861 | retval = count; | |
862 | break; | |
863 | } | |
864 | } | |
865 | spin_unlock(&drv->dynids.lock); | |
866 | ||
867 | return retval; | |
868 | } | |
869 | static DRIVER_ATTR_WO(remove_id); | |
870 | ||
871 | static struct attribute *vmbus_drv_attrs[] = { | |
872 | &driver_attr_new_id.attr, | |
873 | &driver_attr_remove_id.attr, | |
874 | NULL, | |
875 | }; | |
876 | ATTRIBUTE_GROUPS(vmbus_drv); | |
3037a7b6 | 877 | |
b7fc147b S |
878 | |
879 | /* | |
880 | * vmbus_match - Attempt to match the specified device to the specified driver | |
881 | */ | |
882 | static int vmbus_match(struct device *device, struct device_driver *driver) | |
883 | { | |
b7fc147b | 884 | struct hv_driver *drv = drv_to_hv_drv(driver); |
e8e27047 | 885 | struct hv_device *hv_dev = device_to_hv_device(device); |
b7fc147b | 886 | |
8981da32 DC |
887 | /* The hv_sock driver handles all hv_sock offers. */ |
888 | if (is_hvsock_channel(hv_dev->channel)) | |
889 | return drv->hvsock; | |
890 | ||
d765edbb | 891 | if (hv_vmbus_get_id(drv, hv_dev)) |
3037a7b6 | 892 | return 1; |
de632a2b | 893 | |
5841a829 | 894 | return 0; |
b7fc147b S |
895 | } |
896 | ||
f1f0d67b S |
897 | /* |
898 | * vmbus_probe - Add the new vmbus's child device | |
899 | */ | |
900 | static int vmbus_probe(struct device *child_device) | |
901 | { | |
902 | int ret = 0; | |
903 | struct hv_driver *drv = | |
904 | drv_to_hv_drv(child_device->driver); | |
9efd21e1 | 905 | struct hv_device *dev = device_to_hv_device(child_device); |
84946899 | 906 | const struct hv_vmbus_device_id *dev_id; |
f1f0d67b | 907 | |
d765edbb | 908 | dev_id = hv_vmbus_get_id(drv, dev); |
9efd21e1 | 909 | if (drv->probe) { |
84946899 | 910 | ret = drv->probe(dev, dev_id); |
b14a7b30 | 911 | if (ret != 0) |
0a46618d HJ |
912 | pr_err("probe failed for device %s (%d)\n", |
913 | dev_name(child_device), ret); | |
f1f0d67b | 914 | |
f1f0d67b | 915 | } else { |
0a46618d HJ |
916 | pr_err("probe not set for driver %s\n", |
917 | dev_name(child_device)); | |
6de925b1 | 918 | ret = -ENODEV; |
f1f0d67b S |
919 | } |
920 | return ret; | |
921 | } | |
922 | ||
37200078 MK |
923 | /* |
924 | * vmbus_dma_configure -- Configure DMA coherence for VMbus device | |
925 | */ | |
926 | static int vmbus_dma_configure(struct device *child_device) | |
927 | { | |
928 | /* | |
929 | * On ARM64, propagate the DMA coherence setting from the top level | |
930 | * VMbus ACPI device to the child VMbus device being added here. | |
931 | * On x86/x64 coherence is assumed and these calls have no effect. | |
932 | */ | |
933 | hv_setup_dma_ops(child_device, | |
934 | device_get_dma_attr(&hv_acpi_dev->dev) == DEV_DMA_COHERENT); | |
935 | return 0; | |
936 | } | |
937 | ||
c5dce3db S |
938 | /* |
939 | * vmbus_remove - Remove a vmbus device | |
940 | */ | |
fc7a6209 | 941 | static void vmbus_remove(struct device *child_device) |
c5dce3db | 942 | { |
d15a0301 | 943 | struct hv_driver *drv; |
415b023a | 944 | struct hv_device *dev = device_to_hv_device(child_device); |
c5dce3db | 945 | |
d15a0301 S |
946 | if (child_device->driver) { |
947 | drv = drv_to_hv_drv(child_device->driver); | |
948 | if (drv->remove) | |
949 | drv->remove(dev); | |
d15a0301 | 950 | } |
c5dce3db S |
951 | } |
952 | ||
eb1bb259 S |
953 | /* |
954 | * vmbus_shutdown - Shutdown a vmbus device | |
955 | */ | |
956 | static void vmbus_shutdown(struct device *child_device) | |
957 | { | |
958 | struct hv_driver *drv; | |
ca6887fb | 959 | struct hv_device *dev = device_to_hv_device(child_device); |
eb1bb259 S |
960 | |
961 | ||
962 | /* The device may not be attached yet */ | |
963 | if (!child_device->driver) | |
964 | return; | |
965 | ||
966 | drv = drv_to_hv_drv(child_device->driver); | |
967 | ||
ca6887fb S |
968 | if (drv->shutdown) |
969 | drv->shutdown(dev); | |
eb1bb259 S |
970 | } |
971 | ||
83b50f83 | 972 | #ifdef CONFIG_PM_SLEEP |
271b2224 DC |
973 | /* |
974 | * vmbus_suspend - Suspend a vmbus device | |
975 | */ | |
976 | static int vmbus_suspend(struct device *child_device) | |
977 | { | |
978 | struct hv_driver *drv; | |
979 | struct hv_device *dev = device_to_hv_device(child_device); | |
980 | ||
981 | /* The device may not be attached yet */ | |
982 | if (!child_device->driver) | |
983 | return 0; | |
984 | ||
985 | drv = drv_to_hv_drv(child_device->driver); | |
986 | if (!drv->suspend) | |
987 | return -EOPNOTSUPP; | |
988 | ||
989 | return drv->suspend(dev); | |
990 | } | |
991 | ||
992 | /* | |
993 | * vmbus_resume - Resume a vmbus device | |
994 | */ | |
995 | static int vmbus_resume(struct device *child_device) | |
996 | { | |
997 | struct hv_driver *drv; | |
998 | struct hv_device *dev = device_to_hv_device(child_device); | |
999 | ||
1000 | /* The device may not be attached yet */ | |
1001 | if (!child_device->driver) | |
1002 | return 0; | |
1003 | ||
1004 | drv = drv_to_hv_drv(child_device->driver); | |
1005 | if (!drv->resume) | |
1006 | return -EOPNOTSUPP; | |
1007 | ||
1008 | return drv->resume(dev); | |
1009 | } | |
1a06d017 DC |
1010 | #else |
1011 | #define vmbus_suspend NULL | |
1012 | #define vmbus_resume NULL | |
83b50f83 | 1013 | #endif /* CONFIG_PM_SLEEP */ |
086e7a56 S |
1014 | |
1015 | /* | |
1016 | * vmbus_device_release - Final callback release of the vmbus child device | |
1017 | */ | |
1018 | static void vmbus_device_release(struct device *device) | |
1019 | { | |
e8e27047 | 1020 | struct hv_device *hv_dev = device_to_hv_device(device); |
34c6801e | 1021 | struct vmbus_channel *channel = hv_dev->channel; |
086e7a56 | 1022 | |
af9ca6f9 BB |
1023 | hv_debug_rm_dev_dir(hv_dev); |
1024 | ||
54a66265 | 1025 | mutex_lock(&vmbus_connection.channel_mutex); |
800b9329 | 1026 | hv_process_channel_removal(channel); |
54a66265 | 1027 | mutex_unlock(&vmbus_connection.channel_mutex); |
e8e27047 | 1028 | kfree(hv_dev); |
086e7a56 S |
1029 | } |
1030 | ||
271b2224 | 1031 | /* |
1a06d017 DC |
1032 | * Note: we must use the "noirq" ops: see the comment before vmbus_bus_pm. |
1033 | * | |
1034 | * suspend_noirq/resume_noirq are set to NULL to support Suspend-to-Idle: we | |
1035 | * shouldn't suspend the vmbus devices upon Suspend-to-Idle, otherwise there | |
1036 | * is no way to wake up a Generation-2 VM. | |
1037 | * | |
1038 | * The other 4 ops are for hibernation. | |
271b2224 | 1039 | */ |
1a06d017 | 1040 | |
271b2224 | 1041 | static const struct dev_pm_ops vmbus_pm = { |
1a06d017 DC |
1042 | .suspend_noirq = NULL, |
1043 | .resume_noirq = NULL, | |
1044 | .freeze_noirq = vmbus_suspend, | |
1045 | .thaw_noirq = vmbus_resume, | |
1046 | .poweroff_noirq = vmbus_suspend, | |
1047 | .restore_noirq = vmbus_resume, | |
271b2224 DC |
1048 | }; |
1049 | ||
454f18a9 | 1050 | /* The one and only one */ |
9adcac5c S |
1051 | static struct bus_type hv_bus = { |
1052 | .name = "vmbus", | |
1053 | .match = vmbus_match, | |
1054 | .shutdown = vmbus_shutdown, | |
1055 | .remove = vmbus_remove, | |
1056 | .probe = vmbus_probe, | |
1057 | .uevent = vmbus_uevent, | |
37200078 | 1058 | .dma_configure = vmbus_dma_configure, |
fc76936d SH |
1059 | .dev_groups = vmbus_dev_groups, |
1060 | .drv_groups = vmbus_drv_groups, | |
c068e3f4 | 1061 | .bus_groups = vmbus_bus_groups, |
271b2224 | 1062 | .pm = &vmbus_pm, |
3e7ee490 HJ |
1063 | }; |
1064 | ||
bf6506f6 TT |
1065 | struct onmessage_work_context { |
1066 | struct work_struct work; | |
a276463b VK |
1067 | struct { |
1068 | struct hv_message_header header; | |
1069 | u8 payload[]; | |
1070 | } msg; | |
bf6506f6 TT |
1071 | }; |
1072 | ||
1073 | static void vmbus_onmessage_work(struct work_struct *work) | |
1074 | { | |
1075 | struct onmessage_work_context *ctx; | |
1076 | ||
09a19628 VK |
1077 | /* Do not process messages if we're in DISCONNECTED state */ |
1078 | if (vmbus_connection.conn_state == DISCONNECTED) | |
1079 | return; | |
1080 | ||
bf6506f6 TT |
1081 | ctx = container_of(work, struct onmessage_work_context, |
1082 | work); | |
5cc41500 VK |
1083 | vmbus_onmessage((struct vmbus_channel_message_header *) |
1084 | &ctx->msg.payload); | |
bf6506f6 TT |
1085 | kfree(ctx); |
1086 | } | |
1087 | ||
d81274aa | 1088 | void vmbus_on_msg_dpc(unsigned long data) |
36199a99 | 1089 | { |
37cdd991 SH |
1090 | struct hv_per_cpu_context *hv_cpu = (void *)data; |
1091 | void *page_addr = hv_cpu->synic_message_page; | |
fe8c1b18 | 1092 | struct hv_message msg_copy, *msg = (struct hv_message *)page_addr + |
36199a99 | 1093 | VMBUS_MESSAGE_SINT; |
652594c7 | 1094 | struct vmbus_channel_message_header *hdr; |
9c400d35 | 1095 | enum vmbus_channel_message_type msgtype; |
e6242fa0 | 1096 | const struct vmbus_channel_message_table_entry *entry; |
bf6506f6 | 1097 | struct onmessage_work_context *ctx; |
9c400d35 | 1098 | __u8 payload_size; |
fe8c1b18 | 1099 | u32 message_type; |
36199a99 | 1100 | |
b0a284dc VK |
1101 | /* |
1102 | * 'enum vmbus_channel_message_type' is supposed to always be 'u32' as | |
1103 | * it is being used in 'struct vmbus_channel_message_header' definition | |
1104 | * which is supposed to match hypervisor ABI. | |
1105 | */ | |
1106 | BUILD_BUG_ON(sizeof(enum vmbus_channel_message_type) != sizeof(u32)); | |
1107 | ||
fe8c1b18 APM |
1108 | /* |
1109 | * Since the message is in memory shared with the host, an erroneous or | |
1110 | * malicious Hyper-V could modify the message while vmbus_on_msg_dpc() | |
1111 | * or individual message handlers are executing; to prevent this, copy | |
1112 | * the message into private memory. | |
1113 | */ | |
1114 | memcpy(&msg_copy, msg, sizeof(struct hv_message)); | |
1115 | ||
1116 | message_type = msg_copy.header.message_type; | |
cd95aad5 | 1117 | if (message_type == HVMSG_NONE) |
7be3e169 VK |
1118 | /* no msg */ |
1119 | return; | |
652594c7 | 1120 | |
fe8c1b18 | 1121 | hdr = (struct vmbus_channel_message_header *)msg_copy.u.payload; |
9c400d35 | 1122 | msgtype = hdr->msgtype; |
652594c7 | 1123 | |
c9fe0f8f VK |
1124 | trace_vmbus_on_msg_dpc(hdr); |
1125 | ||
9c400d35 APM |
1126 | if (msgtype >= CHANNELMSG_COUNT) { |
1127 | WARN_ONCE(1, "unknown msgtype=%d\n", msgtype); | |
7be3e169 VK |
1128 | goto msg_handled; |
1129 | } | |
652594c7 | 1130 | |
fe8c1b18 | 1131 | payload_size = msg_copy.header.payload_size; |
9c400d35 APM |
1132 | if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) { |
1133 | WARN_ONCE(1, "payload size is too large (%d)\n", payload_size); | |
ac0f7d42 VK |
1134 | goto msg_handled; |
1135 | } | |
1136 | ||
9c400d35 | 1137 | entry = &channel_message_table[msgtype]; |
ddc9d357 DC |
1138 | |
1139 | if (!entry->message_handler) | |
1140 | goto msg_handled; | |
1141 | ||
9c400d35 APM |
1142 | if (payload_size < entry->min_payload_len) { |
1143 | WARN_ONCE(1, "message too short: msgtype=%d len=%d\n", msgtype, payload_size); | |
52c7803f VK |
1144 | goto msg_handled; |
1145 | } | |
1146 | ||
7be3e169 | 1147 | if (entry->handler_type == VMHT_BLOCKING) { |
a70d298c | 1148 | ctx = kmalloc(struct_size(ctx, msg.payload, payload_size), GFP_ATOMIC); |
7be3e169 VK |
1149 | if (ctx == NULL) |
1150 | return; | |
652594c7 | 1151 | |
7be3e169 | 1152 | INIT_WORK(&ctx->work, vmbus_onmessage_work); |
fe8c1b18 | 1153 | memcpy(&ctx->msg, &msg_copy, sizeof(msg->header) + payload_size); |
652594c7 | 1154 | |
54a66265 S |
1155 | /* |
1156 | * The host can generate a rescind message while we | |
1157 | * may still be handling the original offer. We deal with | |
b9fa1b87 APM |
1158 | * this condition by relying on the synchronization provided |
1159 | * by offer_in_progress and by channel_mutex. See also the | |
1160 | * inline comments in vmbus_onoffer_rescind(). | |
54a66265 | 1161 | */ |
9c400d35 | 1162 | switch (msgtype) { |
54a66265 S |
1163 | case CHANNELMSG_RESCIND_CHANNELOFFER: |
1164 | /* | |
1165 | * If we are handling the rescind message; | |
1166 | * schedule the work on the global work queue. | |
8a857c55 APM |
1167 | * |
1168 | * The OFFER message and the RESCIND message should | |
1169 | * not be handled by the same serialized work queue, | |
1170 | * because the OFFER handler may call vmbus_open(), | |
1171 | * which tries to open the channel by sending an | |
1172 | * OPEN_CHANNEL message to the host and waits for | |
1173 | * the host's response; however, if the host has | |
1174 | * rescinded the channel before it receives the | |
1175 | * OPEN_CHANNEL message, the host just silently | |
1176 | * ignores the OPEN_CHANNEL message; as a result, | |
1177 | * the guest's OFFER handler hangs for ever, if we | |
1178 | * handle the RESCIND message in the same serialized | |
1179 | * work queue: the RESCIND handler can not start to | |
1180 | * run before the OFFER handler finishes. | |
54a66265 | 1181 | */ |
b9fa1b87 | 1182 | schedule_work(&ctx->work); |
54a66265 S |
1183 | break; |
1184 | ||
1185 | case CHANNELMSG_OFFERCHANNEL: | |
b9fa1b87 APM |
1186 | /* |
1187 | * The host sends the offer message of a given channel | |
1188 | * before sending the rescind message of the same | |
1189 | * channel. These messages are sent to the guest's | |
1190 | * connect CPU; the guest then starts processing them | |
1191 | * in the tasklet handler on this CPU: | |
1192 | * | |
1193 | * VMBUS_CONNECT_CPU | |
1194 | * | |
1195 | * [vmbus_on_msg_dpc()] | |
1196 | * atomic_inc() // CHANNELMSG_OFFERCHANNEL | |
1197 | * queue_work() | |
1198 | * ... | |
1199 | * [vmbus_on_msg_dpc()] | |
1200 | * schedule_work() // CHANNELMSG_RESCIND_CHANNELOFFER | |
1201 | * | |
1202 | * We rely on the memory-ordering properties of the | |
1203 | * queue_work() and schedule_work() primitives, which | |
1204 | * guarantee that the atomic increment will be visible | |
1205 | * to the CPUs which will execute the offer & rescind | |
1206 | * works by the time these works will start execution. | |
1207 | */ | |
54a66265 | 1208 | atomic_inc(&vmbus_connection.offer_in_progress); |
b9fa1b87 | 1209 | fallthrough; |
54a66265 S |
1210 | |
1211 | default: | |
1212 | queue_work(vmbus_connection.work_queue, &ctx->work); | |
1213 | } | |
7be3e169 VK |
1214 | } else |
1215 | entry->message_handler(hdr); | |
36199a99 | 1216 | |
652594c7 | 1217 | msg_handled: |
cd95aad5 | 1218 | vmbus_signal_eom(msg, message_type); |
36199a99 GKH |
1219 | } |
1220 | ||
83b50f83 | 1221 | #ifdef CONFIG_PM_SLEEP |
1f48dcf1 DC |
1222 | /* |
1223 | * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for | |
1224 | * hibernation, because hv_sock connections can not persist across hibernation. | |
1225 | */ | |
1226 | static void vmbus_force_channel_rescinded(struct vmbus_channel *channel) | |
1227 | { | |
1228 | struct onmessage_work_context *ctx; | |
1229 | struct vmbus_channel_rescind_offer *rescind; | |
1230 | ||
1231 | WARN_ON(!is_hvsock_channel(channel)); | |
1232 | ||
1233 | /* | |
a276463b | 1234 | * Allocation size is small and the allocation should really not fail, |
1f48dcf1 DC |
1235 | * otherwise the state of the hv_sock connections ends up in limbo. |
1236 | */ | |
a276463b VK |
1237 | ctx = kzalloc(sizeof(*ctx) + sizeof(*rescind), |
1238 | GFP_KERNEL | __GFP_NOFAIL); | |
1f48dcf1 DC |
1239 | |
1240 | /* | |
1241 | * So far, these are not really used by Linux. Just set them to the | |
1242 | * reasonable values conforming to the definitions of the fields. | |
1243 | */ | |
1244 | ctx->msg.header.message_type = 1; | |
1245 | ctx->msg.header.payload_size = sizeof(*rescind); | |
1246 | ||
1247 | /* These values are actually used by Linux. */ | |
a276463b | 1248 | rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.payload; |
1f48dcf1 DC |
1249 | rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER; |
1250 | rescind->child_relid = channel->offermsg.child_relid; | |
1251 | ||
1252 | INIT_WORK(&ctx->work, vmbus_onmessage_work); | |
1253 | ||
b9fa1b87 | 1254 | queue_work(vmbus_connection.work_queue, &ctx->work); |
1f48dcf1 | 1255 | } |
83b50f83 | 1256 | #endif /* CONFIG_PM_SLEEP */ |
631e63a9 SH |
1257 | |
1258 | /* | |
1259 | * Schedule all channels with events pending | |
1260 | */ | |
1261 | static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu) | |
1262 | { | |
1263 | unsigned long *recv_int_page; | |
1264 | u32 maxbits, relid; | |
1265 | ||
1266 | if (vmbus_proto_version < VERSION_WIN8) { | |
1267 | maxbits = MAX_NUM_CHANNELS_SUPPORTED; | |
1268 | recv_int_page = vmbus_connection.recv_int_page; | |
1269 | } else { | |
1270 | /* | |
1271 | * When the host is win8 and beyond, the event page | |
1272 | * can be directly checked to get the id of the channel | |
1273 | * that has the interrupt pending. | |
1274 | */ | |
1275 | void *page_addr = hv_cpu->synic_event_page; | |
1276 | union hv_synic_event_flags *event | |
1277 | = (union hv_synic_event_flags *)page_addr + | |
1278 | VMBUS_MESSAGE_SINT; | |
1279 | ||
1280 | maxbits = HV_EVENT_FLAGS_COUNT; | |
1281 | recv_int_page = event->flags; | |
1282 | } | |
1283 | ||
1284 | if (unlikely(!recv_int_page)) | |
1285 | return; | |
1286 | ||
1287 | for_each_set_bit(relid, recv_int_page, maxbits) { | |
9403b66e | 1288 | void (*callback_fn)(void *context); |
631e63a9 SH |
1289 | struct vmbus_channel *channel; |
1290 | ||
1291 | if (!sync_test_and_clear_bit(relid, recv_int_page)) | |
1292 | continue; | |
1293 | ||
1294 | /* Special case - vmbus channel protocol msg */ | |
1295 | if (relid == 0) | |
1296 | continue; | |
1297 | ||
8b6a877c APM |
1298 | /* |
1299 | * Pairs with the kfree_rcu() in vmbus_chan_release(). | |
1300 | * Guarantees that the channel data structure doesn't | |
1301 | * get freed while the channel pointer below is being | |
1302 | * dereferenced. | |
1303 | */ | |
8200f208 SH |
1304 | rcu_read_lock(); |
1305 | ||
631e63a9 | 1306 | /* Find channel based on relid */ |
8b6a877c APM |
1307 | channel = relid2channel(relid); |
1308 | if (channel == NULL) | |
1309 | goto sched_unlock_rcu; | |
b71e3282 | 1310 | |
8b6a877c APM |
1311 | if (channel->rescind) |
1312 | goto sched_unlock_rcu; | |
6f3d791f | 1313 | |
9403b66e APM |
1314 | /* |
1315 | * Make sure that the ring buffer data structure doesn't get | |
1316 | * freed while we dereference the ring buffer pointer. Test | |
1317 | * for the channel's onchannel_callback being NULL within a | |
1318 | * sched_lock critical section. See also the inline comments | |
1319 | * in vmbus_reset_channel_cb(). | |
1320 | */ | |
1321 | spin_lock(&channel->sched_lock); | |
991f8f1c | 1322 | |
9403b66e APM |
1323 | callback_fn = channel->onchannel_callback; |
1324 | if (unlikely(callback_fn == NULL)) | |
1325 | goto sched_unlock; | |
6981fbf3 | 1326 | |
8b6a877c | 1327 | trace_vmbus_chan_sched(channel); |
b71e3282 | 1328 | |
8b6a877c | 1329 | ++channel->interrupts; |
6981fbf3 | 1330 | |
8b6a877c APM |
1331 | switch (channel->callback_mode) { |
1332 | case HV_CALL_ISR: | |
9403b66e | 1333 | (*callback_fn)(channel->channel_callback_context); |
8b6a877c | 1334 | break; |
b71e3282 | 1335 | |
8b6a877c APM |
1336 | case HV_CALL_BATCHED: |
1337 | hv_begin_read(&channel->inbound); | |
1338 | fallthrough; | |
1339 | case HV_CALL_DIRECT: | |
1340 | tasklet_schedule(&channel->callback_event); | |
631e63a9 | 1341 | } |
8200f208 | 1342 | |
9403b66e APM |
1343 | sched_unlock: |
1344 | spin_unlock(&channel->sched_lock); | |
8b6a877c | 1345 | sched_unlock_rcu: |
8200f208 | 1346 | rcu_read_unlock(); |
631e63a9 SH |
1347 | } |
1348 | } | |
1349 | ||
76d388cd | 1350 | static void vmbus_isr(void) |
36199a99 | 1351 | { |
37cdd991 SH |
1352 | struct hv_per_cpu_context *hv_cpu |
1353 | = this_cpu_ptr(hv_context.cpu_context); | |
1354 | void *page_addr = hv_cpu->synic_event_page; | |
36199a99 GKH |
1355 | struct hv_message *msg; |
1356 | union hv_synic_event_flags *event; | |
ae4636e6 | 1357 | bool handled = false; |
36199a99 | 1358 | |
37cdd991 | 1359 | if (unlikely(page_addr == NULL)) |
76d388cd | 1360 | return; |
5ab05951 S |
1361 | |
1362 | event = (union hv_synic_event_flags *)page_addr + | |
1363 | VMBUS_MESSAGE_SINT; | |
7341d908 S |
1364 | /* |
1365 | * Check for events before checking for messages. This is the order | |
1366 | * in which events and messages are checked in Windows guests on | |
1367 | * Hyper-V, and the Windows team suggested we do the same. | |
1368 | */ | |
36199a99 | 1369 | |
6552ecd7 S |
1370 | if ((vmbus_proto_version == VERSION_WS2008) || |
1371 | (vmbus_proto_version == VERSION_WIN7)) { | |
36199a99 | 1372 | |
6552ecd7 | 1373 | /* Since we are a child, we only need to check bit 0 */ |
5c1bec61 | 1374 | if (sync_test_and_clear_bit(0, event->flags)) |
6552ecd7 | 1375 | handled = true; |
6552ecd7 S |
1376 | } else { |
1377 | /* | |
1378 | * Our host is win8 or above. The signaling mechanism | |
1379 | * has changed and we can directly look at the event page. | |
1380 | * If bit n is set then we have an interrup on the channel | |
1381 | * whose id is n. | |
1382 | */ | |
ae4636e6 | 1383 | handled = true; |
ae4636e6 | 1384 | } |
793be9c7 | 1385 | |
6552ecd7 | 1386 | if (handled) |
631e63a9 | 1387 | vmbus_chan_sched(hv_cpu); |
6552ecd7 | 1388 | |
37cdd991 | 1389 | page_addr = hv_cpu->synic_message_page; |
7341d908 S |
1390 | msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; |
1391 | ||
1392 | /* Check if there are actual msgs to be processed */ | |
4061ed9e | 1393 | if (msg->header.message_type != HVMSG_NONE) { |
fd1fea68 MK |
1394 | if (msg->header.message_type == HVMSG_TIMER_EXPIRED) { |
1395 | hv_stimer0_isr(); | |
1396 | vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED); | |
1397 | } else | |
37cdd991 | 1398 | tasklet_schedule(&hv_cpu->msg_dpc); |
4061ed9e | 1399 | } |
4b44f2d1 | 1400 | |
703f7066 | 1401 | add_interrupt_randomness(vmbus_interrupt); |
d608715d MK |
1402 | } |
1403 | ||
1404 | static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id) | |
1405 | { | |
1406 | vmbus_isr(); | |
1407 | return IRQ_HANDLED; | |
793be9c7 S |
1408 | } |
1409 | ||
81b18bce SM |
1410 | /* |
1411 | * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg | |
1412 | * buffer and call into Hyper-V to transfer the data. | |
1413 | */ | |
1414 | static void hv_kmsg_dump(struct kmsg_dumper *dumper, | |
1415 | enum kmsg_dump_reason reason) | |
1416 | { | |
f9f3f02d | 1417 | struct kmsg_dump_iter iter; |
81b18bce | 1418 | size_t bytes_written; |
81b18bce SM |
1419 | |
1420 | /* We are only interested in panics. */ | |
1421 | if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg)) | |
1422 | return; | |
1423 | ||
81b18bce SM |
1424 | /* |
1425 | * Write dump contents to the page. No need to synchronize; panic should | |
1426 | * be single-threaded. | |
1427 | */ | |
f9f3f02d JO |
1428 | kmsg_dump_rewind(&iter); |
1429 | kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE, | |
ddcaf3ca | 1430 | &bytes_written); |
b548a774 MK |
1431 | if (!bytes_written) |
1432 | return; | |
1433 | /* | |
1434 | * P3 to contain the physical address of the panic page & P4 to | |
1435 | * contain the size of the panic data in that page. Rest of the | |
1436 | * registers are no-op when the NOTIFY_MSG flag is set. | |
1437 | */ | |
1438 | hv_set_register(HV_REGISTER_CRASH_P0, 0); | |
1439 | hv_set_register(HV_REGISTER_CRASH_P1, 0); | |
1440 | hv_set_register(HV_REGISTER_CRASH_P2, 0); | |
1441 | hv_set_register(HV_REGISTER_CRASH_P3, virt_to_phys(hv_panic_page)); | |
1442 | hv_set_register(HV_REGISTER_CRASH_P4, bytes_written); | |
1443 | ||
1444 | /* | |
1445 | * Let Hyper-V know there is crash data available along with | |
1446 | * the panic message. | |
1447 | */ | |
1448 | hv_set_register(HV_REGISTER_CRASH_CTL, | |
1449 | (HV_CRASH_CTL_CRASH_NOTIFY | HV_CRASH_CTL_CRASH_NOTIFY_MSG)); | |
81b18bce SM |
1450 | } |
1451 | ||
1452 | static struct kmsg_dumper hv_kmsg_dumper = { | |
1453 | .dump = hv_kmsg_dump, | |
1454 | }; | |
1455 | ||
b0c03eff MC |
1456 | static void hv_kmsg_dump_register(void) |
1457 | { | |
1458 | int ret; | |
1459 | ||
1460 | hv_panic_page = hv_alloc_hyperv_zeroed_page(); | |
1461 | if (!hv_panic_page) { | |
1462 | pr_err("Hyper-V: panic message page memory allocation failed\n"); | |
1463 | return; | |
1464 | } | |
1465 | ||
1466 | ret = kmsg_dump_register(&hv_kmsg_dumper); | |
1467 | if (ret) { | |
1468 | pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret); | |
1469 | hv_free_hyperv_page((unsigned long)hv_panic_page); | |
1470 | hv_panic_page = NULL; | |
1471 | } | |
1472 | } | |
1473 | ||
81b18bce | 1474 | static struct ctl_table_header *hv_ctl_table_hdr; |
81b18bce SM |
1475 | |
1476 | /* | |
1477 | * sysctl option to allow the user to control whether kmsg data should be | |
1478 | * reported to Hyper-V on panic. | |
1479 | */ | |
1480 | static struct ctl_table hv_ctl_table[] = { | |
1481 | { | |
1482 | .procname = "hyperv_record_panic_msg", | |
1483 | .data = &sysctl_record_panic_msg, | |
1484 | .maxlen = sizeof(int), | |
1485 | .mode = 0644, | |
1486 | .proc_handler = proc_dointvec_minmax, | |
eec4844f MC |
1487 | .extra1 = SYSCTL_ZERO, |
1488 | .extra2 = SYSCTL_ONE | |
81b18bce SM |
1489 | }, |
1490 | {} | |
1491 | }; | |
1492 | ||
1493 | static struct ctl_table hv_root_table[] = { | |
1494 | { | |
1495 | .procname = "kernel", | |
1496 | .mode = 0555, | |
1497 | .child = hv_ctl_table | |
1498 | }, | |
1499 | {} | |
1500 | }; | |
e513229b | 1501 | |
3e189519 | 1502 | /* |
90c9960e GKH |
1503 | * vmbus_bus_init -Main vmbus driver initialization routine. |
1504 | * | |
1505 | * Here, we | |
0686e4f4 | 1506 | * - initialize the vmbus driver context |
0686e4f4 | 1507 | * - invoke the vmbus hv main init routine |
0686e4f4 | 1508 | * - retrieve the channel offers |
90c9960e | 1509 | */ |
efc26722 | 1510 | static int vmbus_bus_init(void) |
3e7ee490 | 1511 | { |
90c9960e | 1512 | int ret; |
3e7ee490 | 1513 | |
6d26e38f | 1514 | ret = hv_init(); |
90c9960e | 1515 | if (ret != 0) { |
0a46618d | 1516 | pr_err("Unable to initialize the hypervisor - 0x%x\n", ret); |
d6c1c5de | 1517 | return ret; |
3e7ee490 HJ |
1518 | } |
1519 | ||
9adcac5c | 1520 | ret = bus_register(&hv_bus); |
d6c1c5de | 1521 | if (ret) |
d6f3609d | 1522 | return ret; |
3e7ee490 | 1523 | |
d608715d MK |
1524 | /* |
1525 | * VMbus interrupts are best modeled as per-cpu interrupts. If | |
1526 | * on an architecture with support for per-cpu IRQs (e.g. ARM64), | |
1527 | * allocate a per-cpu IRQ using standard Linux kernel functionality. | |
1528 | * If not on such an architecture (e.g., x86/x64), then rely on | |
1529 | * code in the arch-specific portion of the code tree to connect | |
1530 | * the VMbus interrupt handler. | |
1531 | */ | |
1532 | ||
1533 | if (vmbus_irq == -1) { | |
1534 | hv_setup_vmbus_handler(vmbus_isr); | |
1535 | } else { | |
1536 | vmbus_evt = alloc_percpu(long); | |
1537 | ret = request_percpu_irq(vmbus_irq, vmbus_percpu_isr, | |
1538 | "Hyper-V VMbus", vmbus_evt); | |
1539 | if (ret) { | |
1540 | pr_err("Can't request Hyper-V VMbus IRQ %d, Err %d", | |
1541 | vmbus_irq, ret); | |
1542 | free_percpu(vmbus_evt); | |
1543 | goto err_setup; | |
1544 | } | |
1545 | } | |
3e7ee490 | 1546 | |
2608fb65 JW |
1547 | ret = hv_synic_alloc(); |
1548 | if (ret) | |
1549 | goto err_alloc; | |
fd1fea68 | 1550 | |
800b6902 | 1551 | /* |
fd1fea68 MK |
1552 | * Initialize the per-cpu interrupt state and stimer state. |
1553 | * Then connect to the host. | |
800b6902 | 1554 | */ |
4a5f3cde | 1555 | ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online", |
76d36ab7 VK |
1556 | hv_synic_init, hv_synic_cleanup); |
1557 | if (ret < 0) | |
fd1fea68 | 1558 | goto err_cpuhp; |
76d36ab7 VK |
1559 | hyperv_cpuhp_online = ret; |
1560 | ||
800b6902 | 1561 | ret = vmbus_connect(); |
8b9987e9 | 1562 | if (ret) |
17efbee8 | 1563 | goto err_connect; |
800b6902 | 1564 | |
9f8b577f APM |
1565 | if (hv_is_isolation_supported()) |
1566 | sysctl_record_panic_msg = 0; | |
1567 | ||
96c1d058 NM |
1568 | /* |
1569 | * Only register if the crash MSRs are available | |
1570 | */ | |
cc2dd402 | 1571 | if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { |
81b18bce SM |
1572 | u64 hyperv_crash_ctl; |
1573 | /* | |
9f8b577f APM |
1574 | * Panic message recording (sysctl_record_panic_msg) |
1575 | * is enabled by default in non-isolated guests and | |
1576 | * disabled by default in isolated guests; the panic | |
1577 | * message recording won't be available in isolated | |
1578 | * guests should the following registration fail. | |
81b18bce SM |
1579 | */ |
1580 | hv_ctl_table_hdr = register_sysctl_table(hv_root_table); | |
1581 | if (!hv_ctl_table_hdr) | |
1582 | pr_err("Hyper-V: sysctl table register error"); | |
1583 | ||
1584 | /* | |
1585 | * Register for panic kmsg callback only if the right | |
1586 | * capability is supported by the hypervisor. | |
1587 | */ | |
f3c5e63c | 1588 | hyperv_crash_ctl = hv_get_register(HV_REGISTER_CRASH_CTL); |
b0c03eff MC |
1589 | if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) |
1590 | hv_kmsg_dump_register(); | |
81b18bce | 1591 | |
510f7aef | 1592 | register_die_notifier(&hyperv_die_block); |
96c1d058 NM |
1593 | } |
1594 | ||
74347a99 TL |
1595 | /* |
1596 | * Always register the panic notifier because we need to unload | |
1597 | * the VMbus channel connection to prevent any VMbus | |
1598 | * activity after the VM panics. | |
1599 | */ | |
1600 | atomic_notifier_chain_register(&panic_notifier_list, | |
1601 | &hyperv_panic_block); | |
1602 | ||
2d6e882b | 1603 | vmbus_request_offers(); |
8b5d6d3b | 1604 | |
d6c1c5de | 1605 | return 0; |
8b9987e9 | 1606 | |
17efbee8 | 1607 | err_connect: |
76d36ab7 | 1608 | cpuhp_remove_state(hyperv_cpuhp_online); |
fd1fea68 | 1609 | err_cpuhp: |
2608fb65 | 1610 | hv_synic_free(); |
4df4cb9e | 1611 | err_alloc: |
d608715d MK |
1612 | if (vmbus_irq == -1) { |
1613 | hv_remove_vmbus_handler(); | |
1614 | } else { | |
1615 | free_percpu_irq(vmbus_irq, vmbus_evt); | |
1616 | free_percpu(vmbus_evt); | |
1617 | } | |
626b901f | 1618 | err_setup: |
8b9987e9 | 1619 | bus_unregister(&hv_bus); |
8afc06dd SM |
1620 | unregister_sysctl_table(hv_ctl_table_hdr); |
1621 | hv_ctl_table_hdr = NULL; | |
8b9987e9 | 1622 | return ret; |
3e7ee490 HJ |
1623 | } |
1624 | ||
90c9960e | 1625 | /** |
35464483 JO |
1626 | * __vmbus_child_driver_register() - Register a vmbus's driver |
1627 | * @hv_driver: Pointer to driver structure you want to register | |
768fa219 GKH |
1628 | * @owner: owner module of the drv |
1629 | * @mod_name: module name string | |
3e189519 HJ |
1630 | * |
1631 | * Registers the given driver with Linux through the 'driver_register()' call | |
768fa219 | 1632 | * and sets up the hyper-v vmbus handling for this driver. |
3e189519 HJ |
1633 | * It will return the state of the 'driver_register()' call. |
1634 | * | |
90c9960e | 1635 | */ |
768fa219 | 1636 | int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name) |
3e7ee490 | 1637 | { |
5d48a1c2 | 1638 | int ret; |
3e7ee490 | 1639 | |
768fa219 | 1640 | pr_info("registering driver %s\n", hv_driver->name); |
3e7ee490 | 1641 | |
cf6a2eac S |
1642 | ret = vmbus_exists(); |
1643 | if (ret < 0) | |
1644 | return ret; | |
1645 | ||
768fa219 GKH |
1646 | hv_driver->driver.name = hv_driver->name; |
1647 | hv_driver->driver.owner = owner; | |
1648 | hv_driver->driver.mod_name = mod_name; | |
1649 | hv_driver->driver.bus = &hv_bus; | |
3e7ee490 | 1650 | |
fc76936d SH |
1651 | spin_lock_init(&hv_driver->dynids.lock); |
1652 | INIT_LIST_HEAD(&hv_driver->dynids.list); | |
1653 | ||
768fa219 | 1654 | ret = driver_register(&hv_driver->driver); |
3e7ee490 | 1655 | |
5d48a1c2 | 1656 | return ret; |
3e7ee490 | 1657 | } |
768fa219 | 1658 | EXPORT_SYMBOL_GPL(__vmbus_driver_register); |
3e7ee490 | 1659 | |
90c9960e | 1660 | /** |
768fa219 | 1661 | * vmbus_driver_unregister() - Unregister a vmbus's driver |
35464483 JO |
1662 | * @hv_driver: Pointer to driver structure you want to |
1663 | * un-register | |
3e189519 | 1664 | * |
768fa219 GKH |
1665 | * Un-register the given driver that was previous registered with a call to |
1666 | * vmbus_driver_register() | |
90c9960e | 1667 | */ |
768fa219 | 1668 | void vmbus_driver_unregister(struct hv_driver *hv_driver) |
3e7ee490 | 1669 | { |
768fa219 | 1670 | pr_info("unregistering driver %s\n", hv_driver->name); |
3e7ee490 | 1671 | |
fc76936d | 1672 | if (!vmbus_exists()) { |
8f257a14 | 1673 | driver_unregister(&hv_driver->driver); |
fc76936d SH |
1674 | vmbus_free_dynids(hv_driver); |
1675 | } | |
3e7ee490 | 1676 | } |
768fa219 | 1677 | EXPORT_SYMBOL_GPL(vmbus_driver_unregister); |
3e7ee490 | 1678 | |
c2e5df61 SH |
1679 | |
1680 | /* | |
1681 | * Called when last reference to channel is gone. | |
1682 | */ | |
1683 | static void vmbus_chan_release(struct kobject *kobj) | |
1684 | { | |
1685 | struct vmbus_channel *channel | |
1686 | = container_of(kobj, struct vmbus_channel, kobj); | |
1687 | ||
1688 | kfree_rcu(channel, rcu); | |
1689 | } | |
1690 | ||
1691 | struct vmbus_chan_attribute { | |
1692 | struct attribute attr; | |
14948e39 | 1693 | ssize_t (*show)(struct vmbus_channel *chan, char *buf); |
c2e5df61 SH |
1694 | ssize_t (*store)(struct vmbus_channel *chan, |
1695 | const char *buf, size_t count); | |
1696 | }; | |
1697 | #define VMBUS_CHAN_ATTR(_name, _mode, _show, _store) \ | |
1698 | struct vmbus_chan_attribute chan_attr_##_name \ | |
1699 | = __ATTR(_name, _mode, _show, _store) | |
1700 | #define VMBUS_CHAN_ATTR_RW(_name) \ | |
1701 | struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RW(_name) | |
1702 | #define VMBUS_CHAN_ATTR_RO(_name) \ | |
1703 | struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RO(_name) | |
1704 | #define VMBUS_CHAN_ATTR_WO(_name) \ | |
1705 | struct vmbus_chan_attribute chan_attr_##_name = __ATTR_WO(_name) | |
1706 | ||
1707 | static ssize_t vmbus_chan_attr_show(struct kobject *kobj, | |
1708 | struct attribute *attr, char *buf) | |
1709 | { | |
1710 | const struct vmbus_chan_attribute *attribute | |
1711 | = container_of(attr, struct vmbus_chan_attribute, attr); | |
14948e39 | 1712 | struct vmbus_channel *chan |
c2e5df61 SH |
1713 | = container_of(kobj, struct vmbus_channel, kobj); |
1714 | ||
1715 | if (!attribute->show) | |
1716 | return -EIO; | |
1717 | ||
1718 | return attribute->show(chan, buf); | |
1719 | } | |
1720 | ||
75278105 APM |
1721 | static ssize_t vmbus_chan_attr_store(struct kobject *kobj, |
1722 | struct attribute *attr, const char *buf, | |
1723 | size_t count) | |
1724 | { | |
1725 | const struct vmbus_chan_attribute *attribute | |
1726 | = container_of(attr, struct vmbus_chan_attribute, attr); | |
1727 | struct vmbus_channel *chan | |
1728 | = container_of(kobj, struct vmbus_channel, kobj); | |
1729 | ||
1730 | if (!attribute->store) | |
1731 | return -EIO; | |
1732 | ||
1733 | return attribute->store(chan, buf, count); | |
1734 | } | |
1735 | ||
c2e5df61 SH |
1736 | static const struct sysfs_ops vmbus_chan_sysfs_ops = { |
1737 | .show = vmbus_chan_attr_show, | |
75278105 | 1738 | .store = vmbus_chan_attr_store, |
c2e5df61 SH |
1739 | }; |
1740 | ||
14948e39 | 1741 | static ssize_t out_mask_show(struct vmbus_channel *channel, char *buf) |
c2e5df61 | 1742 | { |
14948e39 KB |
1743 | struct hv_ring_buffer_info *rbi = &channel->outbound; |
1744 | ssize_t ret; | |
c2e5df61 | 1745 | |
14948e39 KB |
1746 | mutex_lock(&rbi->ring_buffer_mutex); |
1747 | if (!rbi->ring_buffer) { | |
1748 | mutex_unlock(&rbi->ring_buffer_mutex); | |
fcedbb29 | 1749 | return -EINVAL; |
14948e39 | 1750 | } |
fcedbb29 | 1751 | |
14948e39 KB |
1752 | ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask); |
1753 | mutex_unlock(&rbi->ring_buffer_mutex); | |
1754 | return ret; | |
c2e5df61 | 1755 | } |
875c362b | 1756 | static VMBUS_CHAN_ATTR_RO(out_mask); |
c2e5df61 | 1757 | |
14948e39 | 1758 | static ssize_t in_mask_show(struct vmbus_channel *channel, char *buf) |
c2e5df61 | 1759 | { |
14948e39 KB |
1760 | struct hv_ring_buffer_info *rbi = &channel->inbound; |
1761 | ssize_t ret; | |
c2e5df61 | 1762 | |
14948e39 KB |
1763 | mutex_lock(&rbi->ring_buffer_mutex); |
1764 | if (!rbi->ring_buffer) { | |
1765 | mutex_unlock(&rbi->ring_buffer_mutex); | |
fcedbb29 | 1766 | return -EINVAL; |
14948e39 | 1767 | } |
fcedbb29 | 1768 | |
14948e39 KB |
1769 | ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask); |
1770 | mutex_unlock(&rbi->ring_buffer_mutex); | |
1771 | return ret; | |
c2e5df61 | 1772 | } |
875c362b | 1773 | static VMBUS_CHAN_ATTR_RO(in_mask); |
c2e5df61 | 1774 | |
14948e39 | 1775 | static ssize_t read_avail_show(struct vmbus_channel *channel, char *buf) |
c2e5df61 | 1776 | { |
14948e39 KB |
1777 | struct hv_ring_buffer_info *rbi = &channel->inbound; |
1778 | ssize_t ret; | |
c2e5df61 | 1779 | |
14948e39 KB |
1780 | mutex_lock(&rbi->ring_buffer_mutex); |
1781 | if (!rbi->ring_buffer) { | |
1782 | mutex_unlock(&rbi->ring_buffer_mutex); | |
fcedbb29 | 1783 | return -EINVAL; |
14948e39 | 1784 | } |
fcedbb29 | 1785 | |
14948e39 KB |
1786 | ret = sprintf(buf, "%u\n", hv_get_bytes_to_read(rbi)); |
1787 | mutex_unlock(&rbi->ring_buffer_mutex); | |
1788 | return ret; | |
c2e5df61 | 1789 | } |
875c362b | 1790 | static VMBUS_CHAN_ATTR_RO(read_avail); |
c2e5df61 | 1791 | |
14948e39 | 1792 | static ssize_t write_avail_show(struct vmbus_channel *channel, char *buf) |
c2e5df61 | 1793 | { |
14948e39 KB |
1794 | struct hv_ring_buffer_info *rbi = &channel->outbound; |
1795 | ssize_t ret; | |
c2e5df61 | 1796 | |
14948e39 KB |
1797 | mutex_lock(&rbi->ring_buffer_mutex); |
1798 | if (!rbi->ring_buffer) { | |
1799 | mutex_unlock(&rbi->ring_buffer_mutex); | |
fcedbb29 | 1800 | return -EINVAL; |
14948e39 | 1801 | } |
fcedbb29 | 1802 | |
14948e39 KB |
1803 | ret = sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi)); |
1804 | mutex_unlock(&rbi->ring_buffer_mutex); | |
1805 | return ret; | |
c2e5df61 | 1806 | } |
875c362b | 1807 | static VMBUS_CHAN_ATTR_RO(write_avail); |
c2e5df61 | 1808 | |
75278105 | 1809 | static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf) |
c2e5df61 SH |
1810 | { |
1811 | return sprintf(buf, "%u\n", channel->target_cpu); | |
1812 | } | |
75278105 APM |
1813 | static ssize_t target_cpu_store(struct vmbus_channel *channel, |
1814 | const char *buf, size_t count) | |
1815 | { | |
afaa33da | 1816 | u32 target_cpu, origin_cpu; |
75278105 | 1817 | ssize_t ret = count; |
75278105 APM |
1818 | |
1819 | if (vmbus_proto_version < VERSION_WIN10_V4_1) | |
1820 | return -EIO; | |
1821 | ||
1822 | if (sscanf(buf, "%uu", &target_cpu) != 1) | |
1823 | return -EIO; | |
1824 | ||
1825 | /* Validate target_cpu for the cpumask_test_cpu() operation below. */ | |
1826 | if (target_cpu >= nr_cpumask_bits) | |
1827 | return -EINVAL; | |
1828 | ||
1829 | /* No CPUs should come up or down during this. */ | |
1830 | cpus_read_lock(); | |
1831 | ||
0a968209 | 1832 | if (!cpu_online(target_cpu)) { |
75278105 APM |
1833 | cpus_read_unlock(); |
1834 | return -EINVAL; | |
1835 | } | |
1836 | ||
1837 | /* | |
1838 | * Synchronizes target_cpu_store() and channel closure: | |
1839 | * | |
1840 | * { Initially: state = CHANNEL_OPENED } | |
1841 | * | |
1842 | * CPU1 CPU2 | |
1843 | * | |
1844 | * [target_cpu_store()] [vmbus_disconnect_ring()] | |
1845 | * | |
1846 | * LOCK channel_mutex LOCK channel_mutex | |
1847 | * LOAD r1 = state LOAD r2 = state | |
1848 | * IF (r1 == CHANNEL_OPENED) IF (r2 == CHANNEL_OPENED) | |
1849 | * SEND MODIFYCHANNEL STORE state = CHANNEL_OPEN | |
1850 | * [...] SEND CLOSECHANNEL | |
1851 | * UNLOCK channel_mutex UNLOCK channel_mutex | |
1852 | * | |
1853 | * Forbids: r1 == r2 == CHANNEL_OPENED (i.e., CPU1's LOCK precedes | |
1854 | * CPU2's LOCK) && CPU2's SEND precedes CPU1's SEND | |
1855 | * | |
1856 | * Note. The host processes the channel messages "sequentially", in | |
1857 | * the order in which they are received on a per-partition basis. | |
1858 | */ | |
1859 | mutex_lock(&vmbus_connection.channel_mutex); | |
1860 | ||
1861 | /* | |
1862 | * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels; | |
1863 | * avoid sending the message and fail here for such channels. | |
1864 | */ | |
1865 | if (channel->state != CHANNEL_OPENED_STATE) { | |
1866 | ret = -EIO; | |
1867 | goto cpu_store_unlock; | |
1868 | } | |
1869 | ||
afaa33da APM |
1870 | origin_cpu = channel->target_cpu; |
1871 | if (target_cpu == origin_cpu) | |
75278105 APM |
1872 | goto cpu_store_unlock; |
1873 | ||
870ced05 | 1874 | if (vmbus_send_modifychannel(channel, |
75278105 APM |
1875 | hv_cpu_number_to_vp_number(target_cpu))) { |
1876 | ret = -EIO; | |
1877 | goto cpu_store_unlock; | |
1878 | } | |
1879 | ||
1880 | /* | |
870ced05 APM |
1881 | * For version before VERSION_WIN10_V5_3, the following warning holds: |
1882 | * | |
75278105 APM |
1883 | * Warning. At this point, there is *no* guarantee that the host will |
1884 | * have successfully processed the vmbus_send_modifychannel() request. | |
1885 | * See the header comment of vmbus_send_modifychannel() for more info. | |
1886 | * | |
1887 | * Lags in the processing of the above vmbus_send_modifychannel() can | |
1888 | * result in missed interrupts if the "old" target CPU is taken offline | |
1889 | * before Hyper-V starts sending interrupts to the "new" target CPU. | |
1890 | * But apart from this offlining scenario, the code tolerates such | |
1891 | * lags. It will function correctly even if a channel interrupt comes | |
1892 | * in on a CPU that is different from the channel target_cpu value. | |
1893 | */ | |
1894 | ||
1895 | channel->target_cpu = target_cpu; | |
75278105 | 1896 | |
afaa33da APM |
1897 | /* See init_vp_index(). */ |
1898 | if (hv_is_perf_channel(channel)) | |
de96e8a0 | 1899 | hv_update_allocated_cpus(origin_cpu, target_cpu); |
afaa33da APM |
1900 | |
1901 | /* Currently set only for storvsc channels. */ | |
1902 | if (channel->change_target_cpu_callback) { | |
1903 | (*channel->change_target_cpu_callback)(channel, | |
1904 | origin_cpu, target_cpu); | |
1905 | } | |
1906 | ||
75278105 APM |
1907 | cpu_store_unlock: |
1908 | mutex_unlock(&vmbus_connection.channel_mutex); | |
1909 | cpus_read_unlock(); | |
1910 | return ret; | |
1911 | } | |
1912 | static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store); | |
c2e5df61 | 1913 | |
14948e39 | 1914 | static ssize_t channel_pending_show(struct vmbus_channel *channel, |
c2e5df61 SH |
1915 | char *buf) |
1916 | { | |
1917 | return sprintf(buf, "%d\n", | |
1918 | channel_pending(channel, | |
1919 | vmbus_connection.monitor_pages[1])); | |
1920 | } | |
f0434de4 | 1921 | static VMBUS_CHAN_ATTR(pending, 0444, channel_pending_show, NULL); |
c2e5df61 | 1922 | |
14948e39 | 1923 | static ssize_t channel_latency_show(struct vmbus_channel *channel, |
c2e5df61 SH |
1924 | char *buf) |
1925 | { | |
1926 | return sprintf(buf, "%d\n", | |
1927 | channel_latency(channel, | |
1928 | vmbus_connection.monitor_pages[1])); | |
1929 | } | |
f0434de4 | 1930 | static VMBUS_CHAN_ATTR(latency, 0444, channel_latency_show, NULL); |
c2e5df61 | 1931 | |
14948e39 | 1932 | static ssize_t channel_interrupts_show(struct vmbus_channel *channel, char *buf) |
6981fbf3 SH |
1933 | { |
1934 | return sprintf(buf, "%llu\n", channel->interrupts); | |
1935 | } | |
f0434de4 | 1936 | static VMBUS_CHAN_ATTR(interrupts, 0444, channel_interrupts_show, NULL); |
6981fbf3 | 1937 | |
14948e39 | 1938 | static ssize_t channel_events_show(struct vmbus_channel *channel, char *buf) |
6981fbf3 SH |
1939 | { |
1940 | return sprintf(buf, "%llu\n", channel->sig_events); | |
1941 | } | |
f0434de4 | 1942 | static VMBUS_CHAN_ATTR(events, 0444, channel_events_show, NULL); |
6981fbf3 | 1943 | |
14948e39 | 1944 | static ssize_t channel_intr_in_full_show(struct vmbus_channel *channel, |
396ae57e KB |
1945 | char *buf) |
1946 | { | |
1947 | return sprintf(buf, "%llu\n", | |
1948 | (unsigned long long)channel->intr_in_full); | |
1949 | } | |
1950 | static VMBUS_CHAN_ATTR(intr_in_full, 0444, channel_intr_in_full_show, NULL); | |
1951 | ||
14948e39 | 1952 | static ssize_t channel_intr_out_empty_show(struct vmbus_channel *channel, |
396ae57e KB |
1953 | char *buf) |
1954 | { | |
1955 | return sprintf(buf, "%llu\n", | |
1956 | (unsigned long long)channel->intr_out_empty); | |
1957 | } | |
1958 | static VMBUS_CHAN_ATTR(intr_out_empty, 0444, channel_intr_out_empty_show, NULL); | |
1959 | ||
14948e39 | 1960 | static ssize_t channel_out_full_first_show(struct vmbus_channel *channel, |
396ae57e KB |
1961 | char *buf) |
1962 | { | |
1963 | return sprintf(buf, "%llu\n", | |
1964 | (unsigned long long)channel->out_full_first); | |
1965 | } | |
1966 | static VMBUS_CHAN_ATTR(out_full_first, 0444, channel_out_full_first_show, NULL); | |
1967 | ||
14948e39 | 1968 | static ssize_t channel_out_full_total_show(struct vmbus_channel *channel, |
396ae57e KB |
1969 | char *buf) |
1970 | { | |
1971 | return sprintf(buf, "%llu\n", | |
1972 | (unsigned long long)channel->out_full_total); | |
1973 | } | |
1974 | static VMBUS_CHAN_ATTR(out_full_total, 0444, channel_out_full_total_show, NULL); | |
1975 | ||
14948e39 | 1976 | static ssize_t subchannel_monitor_id_show(struct vmbus_channel *channel, |
f0fa2974 SH |
1977 | char *buf) |
1978 | { | |
1979 | return sprintf(buf, "%u\n", channel->offermsg.monitorid); | |
1980 | } | |
f0434de4 | 1981 | static VMBUS_CHAN_ATTR(monitor_id, 0444, subchannel_monitor_id_show, NULL); |
f0fa2974 | 1982 | |
14948e39 | 1983 | static ssize_t subchannel_id_show(struct vmbus_channel *channel, |
f0fa2974 SH |
1984 | char *buf) |
1985 | { | |
1986 | return sprintf(buf, "%u\n", | |
1987 | channel->offermsg.offer.sub_channel_index); | |
1988 | } | |
1989 | static VMBUS_CHAN_ATTR_RO(subchannel_id); | |
1990 | ||
c2e5df61 SH |
1991 | static struct attribute *vmbus_chan_attrs[] = { |
1992 | &chan_attr_out_mask.attr, | |
1993 | &chan_attr_in_mask.attr, | |
1994 | &chan_attr_read_avail.attr, | |
1995 | &chan_attr_write_avail.attr, | |
1996 | &chan_attr_cpu.attr, | |
1997 | &chan_attr_pending.attr, | |
1998 | &chan_attr_latency.attr, | |
6981fbf3 SH |
1999 | &chan_attr_interrupts.attr, |
2000 | &chan_attr_events.attr, | |
396ae57e KB |
2001 | &chan_attr_intr_in_full.attr, |
2002 | &chan_attr_intr_out_empty.attr, | |
2003 | &chan_attr_out_full_first.attr, | |
2004 | &chan_attr_out_full_total.attr, | |
f0fa2974 SH |
2005 | &chan_attr_monitor_id.attr, |
2006 | &chan_attr_subchannel_id.attr, | |
c2e5df61 SH |
2007 | NULL |
2008 | }; | |
2009 | ||
46fc1548 KB |
2010 | /* |
2011 | * Channel-level attribute_group callback function. Returns the permission for | |
2012 | * each attribute, and returns 0 if an attribute is not visible. | |
2013 | */ | |
2014 | static umode_t vmbus_chan_attr_is_visible(struct kobject *kobj, | |
2015 | struct attribute *attr, int idx) | |
2016 | { | |
2017 | const struct vmbus_channel *channel = | |
2018 | container_of(kobj, struct vmbus_channel, kobj); | |
2019 | ||
2020 | /* Hide the monitor attributes if the monitor mechanism is not used. */ | |
2021 | if (!channel->offermsg.monitor_allocated && | |
2022 | (attr == &chan_attr_pending.attr || | |
2023 | attr == &chan_attr_latency.attr || | |
2024 | attr == &chan_attr_monitor_id.attr)) | |
2025 | return 0; | |
2026 | ||
2027 | return attr->mode; | |
2028 | } | |
2029 | ||
2030 | static struct attribute_group vmbus_chan_group = { | |
2031 | .attrs = vmbus_chan_attrs, | |
2032 | .is_visible = vmbus_chan_attr_is_visible | |
2033 | }; | |
2034 | ||
c2e5df61 SH |
2035 | static struct kobj_type vmbus_chan_ktype = { |
2036 | .sysfs_ops = &vmbus_chan_sysfs_ops, | |
2037 | .release = vmbus_chan_release, | |
c2e5df61 SH |
2038 | }; |
2039 | ||
2040 | /* | |
2041 | * vmbus_add_channel_kobj - setup a sub-directory under device/channels | |
2042 | */ | |
2043 | int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel) | |
2044 | { | |
46fc1548 | 2045 | const struct device *device = &dev->device; |
c2e5df61 SH |
2046 | struct kobject *kobj = &channel->kobj; |
2047 | u32 relid = channel->offermsg.child_relid; | |
2048 | int ret; | |
2049 | ||
2050 | kobj->kset = dev->channels_kset; | |
2051 | ret = kobject_init_and_add(kobj, &vmbus_chan_ktype, NULL, | |
2052 | "%u", relid); | |
8bc69f86 ML |
2053 | if (ret) { |
2054 | kobject_put(kobj); | |
c2e5df61 | 2055 | return ret; |
8bc69f86 | 2056 | } |
c2e5df61 | 2057 | |
46fc1548 KB |
2058 | ret = sysfs_create_group(kobj, &vmbus_chan_group); |
2059 | ||
2060 | if (ret) { | |
2061 | /* | |
2062 | * The calling functions' error handling paths will cleanup the | |
2063 | * empty channel directory. | |
2064 | */ | |
8bc69f86 | 2065 | kobject_put(kobj); |
46fc1548 KB |
2066 | dev_err(device, "Unable to set up channel sysfs files\n"); |
2067 | return ret; | |
2068 | } | |
2069 | ||
c2e5df61 SH |
2070 | kobject_uevent(kobj, KOBJ_ADD); |
2071 | ||
2072 | return 0; | |
2073 | } | |
2074 | ||
46fc1548 KB |
2075 | /* |
2076 | * vmbus_remove_channel_attr_group - remove the channel's attribute group | |
2077 | */ | |
2078 | void vmbus_remove_channel_attr_group(struct vmbus_channel *channel) | |
2079 | { | |
2080 | sysfs_remove_group(&channel->kobj, &vmbus_chan_group); | |
2081 | } | |
2082 | ||
3e189519 | 2083 | /* |
f2c73011 | 2084 | * vmbus_device_create - Creates and registers a new child device |
3e189519 | 2085 | * on the vmbus. |
90c9960e | 2086 | */ |
593db803 AS |
2087 | struct hv_device *vmbus_device_create(const guid_t *type, |
2088 | const guid_t *instance, | |
1b9d48f2 | 2089 | struct vmbus_channel *channel) |
3e7ee490 | 2090 | { |
3d3b5518 | 2091 | struct hv_device *child_device_obj; |
3e7ee490 | 2092 | |
6bad88da S |
2093 | child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL); |
2094 | if (!child_device_obj) { | |
0a46618d | 2095 | pr_err("Unable to allocate device object for child device\n"); |
3e7ee490 HJ |
2096 | return NULL; |
2097 | } | |
2098 | ||
cae5b843 | 2099 | child_device_obj->channel = channel; |
593db803 AS |
2100 | guid_copy(&child_device_obj->dev_type, type); |
2101 | guid_copy(&child_device_obj->dev_instance, instance); | |
7047f17d | 2102 | child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */ |
3e7ee490 | 2103 | |
3e7ee490 HJ |
2104 | return child_device_obj; |
2105 | } | |
2106 | ||
3e189519 | 2107 | /* |
22794281 | 2108 | * vmbus_device_register - Register the child device |
90c9960e | 2109 | */ |
22794281 | 2110 | int vmbus_device_register(struct hv_device *child_device_obj) |
3e7ee490 | 2111 | { |
c2e5df61 SH |
2112 | struct kobject *kobj = &child_device_obj->device.kobj; |
2113 | int ret; | |
6bad88da | 2114 | |
f6b2db08 | 2115 | dev_set_name(&child_device_obj->device, "%pUl", |
458c4475 | 2116 | &child_device_obj->channel->offermsg.offer.if_instance); |
3e7ee490 | 2117 | |
0bce28b6 | 2118 | child_device_obj->device.bus = &hv_bus; |
607c1a11 | 2119 | child_device_obj->device.parent = &hv_acpi_dev->dev; |
6bad88da | 2120 | child_device_obj->device.release = vmbus_device_release; |
3e7ee490 | 2121 | |
3a546958 APM |
2122 | child_device_obj->device.dma_parms = &child_device_obj->dma_parms; |
2123 | child_device_obj->device.dma_mask = &child_device_obj->dma_mask; | |
2124 | dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64)); | |
2125 | ||
90c9960e GKH |
2126 | /* |
2127 | * Register with the LDM. This will kick off the driver/device | |
2128 | * binding...which will eventually call vmbus_match() and vmbus_probe() | |
2129 | */ | |
6bad88da | 2130 | ret = device_register(&child_device_obj->device); |
c2e5df61 | 2131 | if (ret) { |
0a46618d | 2132 | pr_err("Unable to register child device\n"); |
c2e5df61 SH |
2133 | return ret; |
2134 | } | |
2135 | ||
2136 | child_device_obj->channels_kset = kset_create_and_add("channels", | |
2137 | NULL, kobj); | |
2138 | if (!child_device_obj->channels_kset) { | |
2139 | ret = -ENOMEM; | |
2140 | goto err_dev_unregister; | |
2141 | } | |
2142 | ||
2143 | ret = vmbus_add_channel_kobj(child_device_obj, | |
2144 | child_device_obj->channel); | |
2145 | if (ret) { | |
2146 | pr_err("Unable to register primary channeln"); | |
2147 | goto err_kset_unregister; | |
2148 | } | |
af9ca6f9 | 2149 | hv_debug_add_dev_dir(child_device_obj); |
c2e5df61 SH |
2150 | |
2151 | return 0; | |
2152 | ||
2153 | err_kset_unregister: | |
2154 | kset_unregister(child_device_obj->channels_kset); | |
3e7ee490 | 2155 | |
c2e5df61 SH |
2156 | err_dev_unregister: |
2157 | device_unregister(&child_device_obj->device); | |
3e7ee490 HJ |
2158 | return ret; |
2159 | } | |
2160 | ||
3e189519 | 2161 | /* |
696453ba | 2162 | * vmbus_device_unregister - Remove the specified child device |
3e189519 | 2163 | * from the vmbus. |
90c9960e | 2164 | */ |
696453ba | 2165 | void vmbus_device_unregister(struct hv_device *device_obj) |
3e7ee490 | 2166 | { |
84672369 FS |
2167 | pr_debug("child device %s unregistered\n", |
2168 | dev_name(&device_obj->device)); | |
2169 | ||
869b5567 DC |
2170 | kset_unregister(device_obj->channels_kset); |
2171 | ||
90c9960e GKH |
2172 | /* |
2173 | * Kick off the process of unregistering the device. | |
2174 | * This will call vmbus_remove() and eventually vmbus_device_release() | |
2175 | */ | |
6bad88da | 2176 | device_unregister(&device_obj->device); |
3e7ee490 HJ |
2177 | } |
2178 | ||
3e7ee490 | 2179 | |
b0069f43 | 2180 | /* |
7f163a6f | 2181 | * VMBUS is an acpi enumerated device. Get the information we |
90f34535 | 2182 | * need from DSDT. |
b0069f43 | 2183 | */ |
7f163a6f | 2184 | #define VTPM_BASE_ADDRESS 0xfed40000 |
90f34535 | 2185 | static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) |
b0069f43 | 2186 | { |
7f163a6f JO |
2187 | resource_size_t start = 0; |
2188 | resource_size_t end = 0; | |
2189 | struct resource *new_res; | |
2190 | struct resource **old_res = &hyperv_mmio; | |
2191 | struct resource **prev_res = NULL; | |
626b901f | 2192 | struct resource r; |
7f163a6f | 2193 | |
90f34535 | 2194 | switch (res->type) { |
7f163a6f JO |
2195 | |
2196 | /* | |
2197 | * "Address" descriptors are for bus windows. Ignore | |
2198 | * "memory" descriptors, which are for registers on | |
2199 | * devices. | |
2200 | */ | |
2201 | case ACPI_RESOURCE_TYPE_ADDRESS32: | |
2202 | start = res->data.address32.address.minimum; | |
2203 | end = res->data.address32.address.maximum; | |
4eb923f8 | 2204 | break; |
b0069f43 | 2205 | |
90f34535 | 2206 | case ACPI_RESOURCE_TYPE_ADDRESS64: |
7f163a6f JO |
2207 | start = res->data.address64.address.minimum; |
2208 | end = res->data.address64.address.maximum; | |
4eb923f8 | 2209 | break; |
7f163a6f | 2210 | |
626b901f MK |
2211 | /* |
2212 | * The IRQ information is needed only on ARM64, which Hyper-V | |
2213 | * sets up in the extended format. IRQ information is present | |
2214 | * on x86/x64 in the non-extended format but it is not used by | |
2215 | * Linux. So don't bother checking for the non-extended format. | |
2216 | */ | |
2217 | case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: | |
2218 | if (!acpi_dev_resource_interrupt(res, 0, &r)) { | |
2219 | pr_err("Unable to parse Hyper-V ACPI interrupt\n"); | |
2220 | return AE_ERROR; | |
2221 | } | |
2222 | /* ARM64 INTID for VMbus */ | |
2223 | vmbus_interrupt = res->data.extended_irq.interrupts[0]; | |
2224 | /* Linux IRQ number */ | |
2225 | vmbus_irq = r.start; | |
2226 | return AE_OK; | |
2227 | ||
7f163a6f JO |
2228 | default: |
2229 | /* Unused resource type */ | |
2230 | return AE_OK; | |
2231 | ||
b0069f43 | 2232 | } |
7f163a6f JO |
2233 | /* |
2234 | * Ignore ranges that are below 1MB, as they're not | |
2235 | * necessary or useful here. | |
2236 | */ | |
2237 | if (end < 0x100000) | |
2238 | return AE_OK; | |
2239 | ||
2240 | new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC); | |
2241 | if (!new_res) | |
2242 | return AE_NO_MEMORY; | |
2243 | ||
2244 | /* If this range overlaps the virtual TPM, truncate it. */ | |
2245 | if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) | |
2246 | end = VTPM_BASE_ADDRESS; | |
2247 | ||
2248 | new_res->name = "hyperv mmio"; | |
2249 | new_res->flags = IORESOURCE_MEM; | |
2250 | new_res->start = start; | |
2251 | new_res->end = end; | |
2252 | ||
40f26f31 | 2253 | /* |
40f26f31 JO |
2254 | * If two ranges are adjacent, merge them. |
2255 | */ | |
7f163a6f JO |
2256 | do { |
2257 | if (!*old_res) { | |
2258 | *old_res = new_res; | |
2259 | break; | |
2260 | } | |
2261 | ||
40f26f31 JO |
2262 | if (((*old_res)->end + 1) == new_res->start) { |
2263 | (*old_res)->end = new_res->end; | |
2264 | kfree(new_res); | |
2265 | break; | |
2266 | } | |
2267 | ||
2268 | if ((*old_res)->start == new_res->end + 1) { | |
2269 | (*old_res)->start = new_res->start; | |
2270 | kfree(new_res); | |
2271 | break; | |
2272 | } | |
2273 | ||
23a06831 | 2274 | if ((*old_res)->start > new_res->end) { |
7f163a6f JO |
2275 | new_res->sibling = *old_res; |
2276 | if (prev_res) | |
2277 | (*prev_res)->sibling = new_res; | |
2278 | *old_res = new_res; | |
2279 | break; | |
2280 | } | |
2281 | ||
2282 | prev_res = old_res; | |
2283 | old_res = &(*old_res)->sibling; | |
2284 | ||
2285 | } while (1); | |
b0069f43 S |
2286 | |
2287 | return AE_OK; | |
2288 | } | |
2289 | ||
7f163a6f JO |
2290 | static int vmbus_acpi_remove(struct acpi_device *device) |
2291 | { | |
2292 | struct resource *cur_res; | |
2293 | struct resource *next_res; | |
2294 | ||
2295 | if (hyperv_mmio) { | |
6d146aef JO |
2296 | if (fb_mmio) { |
2297 | __release_region(hyperv_mmio, fb_mmio->start, | |
2298 | resource_size(fb_mmio)); | |
2299 | fb_mmio = NULL; | |
2300 | } | |
2301 | ||
7f163a6f JO |
2302 | for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) { |
2303 | next_res = cur_res->sibling; | |
2304 | kfree(cur_res); | |
2305 | } | |
2306 | } | |
2307 | ||
2308 | return 0; | |
2309 | } | |
2310 | ||
6d146aef JO |
2311 | static void vmbus_reserve_fb(void) |
2312 | { | |
2313 | int size; | |
2314 | /* | |
2315 | * Make a claim for the frame buffer in the resource tree under the | |
2316 | * first node, which will be the one below 4GB. The length seems to | |
2317 | * be underreported, particularly in a Generation 1 VM. So start out | |
2318 | * reserving a larger area and make it smaller until it succeeds. | |
2319 | */ | |
2320 | ||
2321 | if (screen_info.lfb_base) { | |
2322 | if (efi_enabled(EFI_BOOT)) | |
2323 | size = max_t(__u32, screen_info.lfb_size, 0x800000); | |
2324 | else | |
2325 | size = max_t(__u32, screen_info.lfb_size, 0x4000000); | |
2326 | ||
2327 | for (; !fb_mmio && (size >= 0x100000); size >>= 1) { | |
2328 | fb_mmio = __request_region(hyperv_mmio, | |
2329 | screen_info.lfb_base, size, | |
2330 | fb_mmio_name, 0); | |
2331 | } | |
2332 | } | |
2333 | } | |
2334 | ||
35464483 JO |
2335 | /** |
2336 | * vmbus_allocate_mmio() - Pick a memory-mapped I/O range. | |
2337 | * @new: If successful, supplied a pointer to the | |
2338 | * allocated MMIO space. | |
2339 | * @device_obj: Identifies the caller | |
2340 | * @min: Minimum guest physical address of the | |
2341 | * allocation | |
2342 | * @max: Maximum guest physical address | |
2343 | * @size: Size of the range to be allocated | |
2344 | * @align: Alignment of the range to be allocated | |
2345 | * @fb_overlap_ok: Whether this allocation can be allowed | |
2346 | * to overlap the video frame buffer. | |
2347 | * | |
2348 | * This function walks the resources granted to VMBus by the | |
2349 | * _CRS object in the ACPI namespace underneath the parent | |
2350 | * "bridge" whether that's a root PCI bus in the Generation 1 | |
2351 | * case or a Module Device in the Generation 2 case. It then | |
2352 | * attempts to allocate from the global MMIO pool in a way that | |
2353 | * matches the constraints supplied in these parameters and by | |
2354 | * that _CRS. | |
2355 | * | |
2356 | * Return: 0 on success, -errno on failure | |
2357 | */ | |
2358 | int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, | |
2359 | resource_size_t min, resource_size_t max, | |
2360 | resource_size_t size, resource_size_t align, | |
2361 | bool fb_overlap_ok) | |
2362 | { | |
be000f93 | 2363 | struct resource *iter, *shadow; |
ea37a6b8 | 2364 | resource_size_t range_min, range_max, start; |
35464483 | 2365 | const char *dev_n = dev_name(&device_obj->device); |
ea37a6b8 | 2366 | int retval; |
e16dad6b JO |
2367 | |
2368 | retval = -ENXIO; | |
8aea7f82 | 2369 | mutex_lock(&hyperv_mmio_lock); |
35464483 | 2370 | |
ea37a6b8 JO |
2371 | /* |
2372 | * If overlaps with frame buffers are allowed, then first attempt to | |
2373 | * make the allocation from within the reserved region. Because it | |
2374 | * is already reserved, no shadow allocation is necessary. | |
2375 | */ | |
2376 | if (fb_overlap_ok && fb_mmio && !(min > fb_mmio->end) && | |
2377 | !(max < fb_mmio->start)) { | |
2378 | ||
2379 | range_min = fb_mmio->start; | |
2380 | range_max = fb_mmio->end; | |
2381 | start = (range_min + align - 1) & ~(align - 1); | |
2382 | for (; start + size - 1 <= range_max; start += align) { | |
2383 | *new = request_mem_region_exclusive(start, size, dev_n); | |
2384 | if (*new) { | |
2385 | retval = 0; | |
2386 | goto exit; | |
2387 | } | |
2388 | } | |
2389 | } | |
2390 | ||
35464483 JO |
2391 | for (iter = hyperv_mmio; iter; iter = iter->sibling) { |
2392 | if ((iter->start >= max) || (iter->end <= min)) | |
2393 | continue; | |
2394 | ||
2395 | range_min = iter->start; | |
2396 | range_max = iter->end; | |
ea37a6b8 JO |
2397 | start = (range_min + align - 1) & ~(align - 1); |
2398 | for (; start + size - 1 <= range_max; start += align) { | |
2399 | shadow = __request_region(iter, start, size, NULL, | |
2400 | IORESOURCE_BUSY); | |
2401 | if (!shadow) | |
2402 | continue; | |
2403 | ||
2404 | *new = request_mem_region_exclusive(start, size, dev_n); | |
2405 | if (*new) { | |
2406 | shadow->name = (char *)*new; | |
2407 | retval = 0; | |
2408 | goto exit; | |
35464483 JO |
2409 | } |
2410 | ||
ea37a6b8 | 2411 | __release_region(iter, start, size); |
35464483 JO |
2412 | } |
2413 | } | |
2414 | ||
e16dad6b | 2415 | exit: |
8aea7f82 | 2416 | mutex_unlock(&hyperv_mmio_lock); |
e16dad6b | 2417 | return retval; |
35464483 JO |
2418 | } |
2419 | EXPORT_SYMBOL_GPL(vmbus_allocate_mmio); | |
2420 | ||
97fb77dc JO |
2421 | /** |
2422 | * vmbus_free_mmio() - Free a memory-mapped I/O range. | |
2423 | * @start: Base address of region to release. | |
2424 | * @size: Size of the range to be allocated | |
2425 | * | |
2426 | * This function releases anything requested by | |
2427 | * vmbus_mmio_allocate(). | |
2428 | */ | |
2429 | void vmbus_free_mmio(resource_size_t start, resource_size_t size) | |
2430 | { | |
be000f93 JO |
2431 | struct resource *iter; |
2432 | ||
8aea7f82 | 2433 | mutex_lock(&hyperv_mmio_lock); |
be000f93 JO |
2434 | for (iter = hyperv_mmio; iter; iter = iter->sibling) { |
2435 | if ((iter->start >= start + size) || (iter->end <= start)) | |
2436 | continue; | |
2437 | ||
2438 | __release_region(iter, start, size); | |
2439 | } | |
97fb77dc | 2440 | release_mem_region(start, size); |
8aea7f82 | 2441 | mutex_unlock(&hyperv_mmio_lock); |
97fb77dc JO |
2442 | |
2443 | } | |
2444 | EXPORT_SYMBOL_GPL(vmbus_free_mmio); | |
2445 | ||
b0069f43 S |
2446 | static int vmbus_acpi_add(struct acpi_device *device) |
2447 | { | |
2448 | acpi_status result; | |
90f34535 | 2449 | int ret_val = -ENODEV; |
7f163a6f | 2450 | struct acpi_device *ancestor; |
b0069f43 | 2451 | |
607c1a11 S |
2452 | hv_acpi_dev = device; |
2453 | ||
37200078 MK |
2454 | /* |
2455 | * Older versions of Hyper-V for ARM64 fail to include the _CCA | |
2456 | * method on the top level VMbus device in the DSDT. But devices | |
2457 | * are hardware coherent in all current Hyper-V use cases, so fix | |
2458 | * up the ACPI device to behave as if _CCA is present and indicates | |
2459 | * hardware coherence. | |
2460 | */ | |
2461 | ACPI_COMPANION_SET(&device->dev, device); | |
2462 | if (IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED) && | |
2463 | device_get_dma_attr(&device->dev) == DEV_DMA_NOT_SUPPORTED) { | |
2464 | pr_info("No ACPI _CCA found; assuming coherent device I/O\n"); | |
2465 | device->flags.cca_seen = true; | |
2466 | device->flags.coherent_dma = true; | |
2467 | } | |
2468 | ||
0a4425b6 | 2469 | result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, |
90f34535 | 2470 | vmbus_walk_resources, NULL); |
b0069f43 | 2471 | |
90f34535 S |
2472 | if (ACPI_FAILURE(result)) |
2473 | goto acpi_walk_err; | |
2474 | /* | |
7f163a6f JO |
2475 | * Some ancestor of the vmbus acpi device (Gen1 or Gen2 |
2476 | * firmware) is the VMOD that has the mmio ranges. Get that. | |
90f34535 | 2477 | */ |
7f163a6f JO |
2478 | for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) { |
2479 | result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS, | |
2480 | vmbus_walk_resources, NULL); | |
90f34535 S |
2481 | |
2482 | if (ACPI_FAILURE(result)) | |
7f163a6f | 2483 | continue; |
6d146aef JO |
2484 | if (hyperv_mmio) { |
2485 | vmbus_reserve_fb(); | |
7f163a6f | 2486 | break; |
6d146aef | 2487 | } |
b0069f43 | 2488 | } |
90f34535 S |
2489 | ret_val = 0; |
2490 | ||
2491 | acpi_walk_err: | |
b0069f43 | 2492 | complete(&probe_event); |
7f163a6f JO |
2493 | if (ret_val) |
2494 | vmbus_acpi_remove(device); | |
90f34535 | 2495 | return ret_val; |
b0069f43 S |
2496 | } |
2497 | ||
83b50f83 | 2498 | #ifdef CONFIG_PM_SLEEP |
f53335e3 DC |
2499 | static int vmbus_bus_suspend(struct device *dev) |
2500 | { | |
b307b389 | 2501 | struct vmbus_channel *channel, *sc; |
1f48dcf1 DC |
2502 | |
2503 | while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { | |
2504 | /* | |
2505 | * We wait here until the completion of any channel | |
2506 | * offers that are currently in progress. | |
2507 | */ | |
14c685d9 | 2508 | usleep_range(1000, 2000); |
1f48dcf1 DC |
2509 | } |
2510 | ||
2511 | mutex_lock(&vmbus_connection.channel_mutex); | |
2512 | list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { | |
2513 | if (!is_hvsock_channel(channel)) | |
2514 | continue; | |
2515 | ||
2516 | vmbus_force_channel_rescinded(channel); | |
2517 | } | |
2518 | mutex_unlock(&vmbus_connection.channel_mutex); | |
2519 | ||
b307b389 DC |
2520 | /* |
2521 | * Wait until all the sub-channels and hv_sock channels have been | |
2522 | * cleaned up. Sub-channels should be destroyed upon suspend, otherwise | |
2523 | * they would conflict with the new sub-channels that will be created | |
2524 | * in the resume path. hv_sock channels should also be destroyed, but | |
2525 | * a hv_sock channel of an established hv_sock connection can not be | |
2526 | * really destroyed since it may still be referenced by the userspace | |
2527 | * application, so we just force the hv_sock channel to be rescinded | |
2528 | * by vmbus_force_channel_rescinded(), and the userspace application | |
2529 | * will thoroughly destroy the channel after hibernation. | |
2530 | * | |
2531 | * Note: the counter nr_chan_close_on_suspend may never go above 0 if | |
2532 | * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM. | |
2533 | */ | |
2534 | if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0) | |
2535 | wait_for_completion(&vmbus_connection.ready_for_suspend_event); | |
2536 | ||
19873eec DC |
2537 | if (atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0) { |
2538 | pr_err("Can not suspend due to a previous failed resuming\n"); | |
2539 | return -EBUSY; | |
2540 | } | |
d8bd2d44 | 2541 | |
b307b389 DC |
2542 | mutex_lock(&vmbus_connection.channel_mutex); |
2543 | ||
2544 | list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { | |
d8bd2d44 | 2545 | /* |
8b6a877c APM |
2546 | * Remove the channel from the array of channels and invalidate |
2547 | * the channel's relid. Upon resume, vmbus_onoffer() will fix | |
2548 | * up the relid (and other fields, if necessary) and add the | |
2549 | * channel back to the array. | |
d8bd2d44 | 2550 | */ |
8b6a877c | 2551 | vmbus_channel_unmap_relid(channel); |
d8bd2d44 DC |
2552 | channel->offermsg.child_relid = INVALID_RELID; |
2553 | ||
b307b389 DC |
2554 | if (is_hvsock_channel(channel)) { |
2555 | if (!channel->rescind) { | |
2556 | pr_err("hv_sock channel not rescinded!\n"); | |
2557 | WARN_ON_ONCE(1); | |
2558 | } | |
2559 | continue; | |
2560 | } | |
2561 | ||
b307b389 DC |
2562 | list_for_each_entry(sc, &channel->sc_list, sc_list) { |
2563 | pr_err("Sub-channel not deleted!\n"); | |
2564 | WARN_ON_ONCE(1); | |
2565 | } | |
d8bd2d44 DC |
2566 | |
2567 | atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume); | |
b307b389 DC |
2568 | } |
2569 | ||
2570 | mutex_unlock(&vmbus_connection.channel_mutex); | |
2571 | ||
f53335e3 DC |
2572 | vmbus_initiate_unload(false); |
2573 | ||
d8bd2d44 DC |
2574 | /* Reset the event for the next resume. */ |
2575 | reinit_completion(&vmbus_connection.ready_for_resume_event); | |
2576 | ||
f53335e3 DC |
2577 | return 0; |
2578 | } | |
2579 | ||
2580 | static int vmbus_bus_resume(struct device *dev) | |
2581 | { | |
2582 | struct vmbus_channel_msginfo *msginfo; | |
2583 | size_t msgsize; | |
2584 | int ret; | |
2585 | ||
2586 | /* | |
2587 | * We only use the 'vmbus_proto_version', which was in use before | |
2588 | * hibernation, to re-negotiate with the host. | |
2589 | */ | |
bedc61a9 | 2590 | if (!vmbus_proto_version) { |
f53335e3 DC |
2591 | pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version); |
2592 | return -EINVAL; | |
2593 | } | |
2594 | ||
2595 | msgsize = sizeof(*msginfo) + | |
2596 | sizeof(struct vmbus_channel_initiate_contact); | |
2597 | ||
2598 | msginfo = kzalloc(msgsize, GFP_KERNEL); | |
2599 | ||
2600 | if (msginfo == NULL) | |
2601 | return -ENOMEM; | |
2602 | ||
2603 | ret = vmbus_negotiate_version(msginfo, vmbus_proto_version); | |
2604 | ||
2605 | kfree(msginfo); | |
2606 | ||
2607 | if (ret != 0) | |
2608 | return ret; | |
2609 | ||
d8bd2d44 DC |
2610 | WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0); |
2611 | ||
f53335e3 DC |
2612 | vmbus_request_offers(); |
2613 | ||
19873eec DC |
2614 | if (wait_for_completion_timeout( |
2615 | &vmbus_connection.ready_for_resume_event, 10 * HZ) == 0) | |
2616 | pr_err("Some vmbus device is missing after suspending?\n"); | |
d8bd2d44 | 2617 | |
b307b389 DC |
2618 | /* Reset the event for the next suspend. */ |
2619 | reinit_completion(&vmbus_connection.ready_for_suspend_event); | |
2620 | ||
f53335e3 DC |
2621 | return 0; |
2622 | } | |
1a06d017 DC |
2623 | #else |
2624 | #define vmbus_bus_suspend NULL | |
2625 | #define vmbus_bus_resume NULL | |
83b50f83 | 2626 | #endif /* CONFIG_PM_SLEEP */ |
f53335e3 | 2627 | |
b0069f43 S |
2628 | static const struct acpi_device_id vmbus_acpi_device_ids[] = { |
2629 | {"VMBUS", 0}, | |
9d7b18d1 | 2630 | {"VMBus", 0}, |
b0069f43 S |
2631 | {"", 0}, |
2632 | }; | |
2633 | MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids); | |
2634 | ||
f53335e3 | 2635 | /* |
1a06d017 DC |
2636 | * Note: we must use the "no_irq" ops, otherwise hibernation can not work with |
2637 | * PCI device assignment, because "pci_dev_pm_ops" uses the "noirq" ops: in | |
2638 | * the resume path, the pci "noirq" restore op runs before "non-noirq" op (see | |
f53335e3 DC |
2639 | * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() -> |
2640 | * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's | |
1a06d017 DC |
2641 | * resume callback must also run via the "noirq" ops. |
2642 | * | |
2643 | * Set suspend_noirq/resume_noirq to NULL for Suspend-to-Idle: see the comment | |
2644 | * earlier in this file before vmbus_pm. | |
f53335e3 | 2645 | */ |
1a06d017 | 2646 | |
f53335e3 | 2647 | static const struct dev_pm_ops vmbus_bus_pm = { |
1a06d017 DC |
2648 | .suspend_noirq = NULL, |
2649 | .resume_noirq = NULL, | |
2650 | .freeze_noirq = vmbus_bus_suspend, | |
2651 | .thaw_noirq = vmbus_bus_resume, | |
2652 | .poweroff_noirq = vmbus_bus_suspend, | |
2653 | .restore_noirq = vmbus_bus_resume | |
f53335e3 DC |
2654 | }; |
2655 | ||
b0069f43 S |
2656 | static struct acpi_driver vmbus_acpi_driver = { |
2657 | .name = "vmbus", | |
2658 | .ids = vmbus_acpi_device_ids, | |
2659 | .ops = { | |
2660 | .add = vmbus_acpi_add, | |
e4ecb41c | 2661 | .remove = vmbus_acpi_remove, |
b0069f43 | 2662 | }, |
f53335e3 | 2663 | .drv.pm = &vmbus_bus_pm, |
b0069f43 S |
2664 | }; |
2665 | ||
2517281d VK |
2666 | static void hv_kexec_handler(void) |
2667 | { | |
fd1fea68 | 2668 | hv_stimer_global_cleanup(); |
75ff3a8a | 2669 | vmbus_initiate_unload(false); |
523b9408 VK |
2670 | /* Make sure conn_state is set as hv_synic_cleanup checks for it */ |
2671 | mb(); | |
76d36ab7 | 2672 | cpuhp_remove_state(hyperv_cpuhp_online); |
2517281d VK |
2673 | }; |
2674 | ||
b4370df2 VK |
2675 | static void hv_crash_handler(struct pt_regs *regs) |
2676 | { | |
fd1fea68 MK |
2677 | int cpu; |
2678 | ||
75ff3a8a | 2679 | vmbus_initiate_unload(true); |
b4370df2 VK |
2680 | /* |
2681 | * In crash handler we can't schedule synic cleanup for all CPUs, | |
2682 | * doing the cleanup for current CPU only. This should be sufficient | |
2683 | * for kdump. | |
2684 | */ | |
fd1fea68 MK |
2685 | cpu = smp_processor_id(); |
2686 | hv_stimer_cleanup(cpu); | |
7a1323b5 | 2687 | hv_synic_disable_regs(cpu); |
b4370df2 VK |
2688 | }; |
2689 | ||
63ecc6d2 DC |
2690 | static int hv_synic_suspend(void) |
2691 | { | |
2692 | /* | |
4df4cb9e MK |
2693 | * When we reach here, all the non-boot CPUs have been offlined. |
2694 | * If we're in a legacy configuration where stimer Direct Mode is | |
2695 | * not enabled, the stimers on the non-boot CPUs have been unbound | |
2696 | * in hv_synic_cleanup() -> hv_stimer_legacy_cleanup() -> | |
63ecc6d2 DC |
2697 | * hv_stimer_cleanup() -> clockevents_unbind_device(). |
2698 | * | |
4df4cb9e MK |
2699 | * hv_synic_suspend() only runs on CPU0 with interrupts disabled. |
2700 | * Here we do not call hv_stimer_legacy_cleanup() on CPU0 because: | |
2701 | * 1) it's unnecessary as interrupts remain disabled between | |
2702 | * syscore_suspend() and syscore_resume(): see create_image() and | |
2703 | * resume_target_kernel() | |
63ecc6d2 DC |
2704 | * 2) the stimer on CPU0 is automatically disabled later by |
2705 | * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ... | |
4df4cb9e MK |
2706 | * -> clockevents_shutdown() -> ... -> hv_ce_shutdown() |
2707 | * 3) a warning would be triggered if we call | |
2708 | * clockevents_unbind_device(), which may sleep, in an | |
2709 | * interrupts-disabled context. | |
63ecc6d2 DC |
2710 | */ |
2711 | ||
2712 | hv_synic_disable_regs(0); | |
2713 | ||
2714 | return 0; | |
2715 | } | |
2716 | ||
2717 | static void hv_synic_resume(void) | |
2718 | { | |
2719 | hv_synic_enable_regs(0); | |
2720 | ||
2721 | /* | |
2722 | * Note: we don't need to call hv_stimer_init(0), because the timer | |
2723 | * on CPU0 is not unbound in hv_synic_suspend(), and the timer is | |
2724 | * automatically re-enabled in timekeeping_resume(). | |
2725 | */ | |
2726 | } | |
2727 | ||
2728 | /* The callbacks run only on CPU0, with irqs_disabled. */ | |
2729 | static struct syscore_ops hv_synic_syscore_ops = { | |
2730 | .suspend = hv_synic_suspend, | |
2731 | .resume = hv_synic_resume, | |
2732 | }; | |
2733 | ||
607c1a11 | 2734 | static int __init hv_acpi_init(void) |
1168ac22 | 2735 | { |
2dda95f8 | 2736 | int ret, t; |
b0069f43 | 2737 | |
4a5f3cde | 2738 | if (!hv_is_hyperv_initialized()) |
0592969e JW |
2739 | return -ENODEV; |
2740 | ||
7e279d78 WL |
2741 | if (hv_root_partition) |
2742 | return 0; | |
2743 | ||
b0069f43 S |
2744 | init_completion(&probe_event); |
2745 | ||
2746 | /* | |
efc26722 | 2747 | * Get ACPI resources first. |
b0069f43 | 2748 | */ |
0246604c S |
2749 | ret = acpi_bus_register_driver(&vmbus_acpi_driver); |
2750 | ||
b0069f43 S |
2751 | if (ret) |
2752 | return ret; | |
2753 | ||
2dda95f8 S |
2754 | t = wait_for_completion_timeout(&probe_event, 5*HZ); |
2755 | if (t == 0) { | |
2756 | ret = -ETIMEDOUT; | |
2757 | goto cleanup; | |
2758 | } | |
d608715d MK |
2759 | |
2760 | /* | |
2761 | * If we're on an architecture with a hardcoded hypervisor | |
2762 | * vector (i.e. x86/x64), override the VMbus interrupt found | |
2763 | * in the ACPI tables. Ensure vmbus_irq is not set since the | |
2764 | * normal Linux IRQ mechanism is not used in this case. | |
2765 | */ | |
2766 | #ifdef HYPERVISOR_CALLBACK_VECTOR | |
2767 | vmbus_interrupt = HYPERVISOR_CALLBACK_VECTOR; | |
2768 | vmbus_irq = -1; | |
2769 | #endif | |
2770 | ||
af9ca6f9 | 2771 | hv_debug_init(); |
b0069f43 | 2772 | |
efc26722 | 2773 | ret = vmbus_bus_init(); |
91fd799e | 2774 | if (ret) |
2dda95f8 S |
2775 | goto cleanup; |
2776 | ||
2517281d | 2777 | hv_setup_kexec_handler(hv_kexec_handler); |
b4370df2 | 2778 | hv_setup_crash_handler(hv_crash_handler); |
2517281d | 2779 | |
63ecc6d2 DC |
2780 | register_syscore_ops(&hv_synic_syscore_ops); |
2781 | ||
2dda95f8 S |
2782 | return 0; |
2783 | ||
2784 | cleanup: | |
2785 | acpi_bus_unregister_driver(&vmbus_acpi_driver); | |
cf6a2eac | 2786 | hv_acpi_dev = NULL; |
91fd799e | 2787 | return ret; |
1168ac22 S |
2788 | } |
2789 | ||
93e5bd06 S |
2790 | static void __exit vmbus_exit(void) |
2791 | { | |
e72e7ac5 VK |
2792 | int cpu; |
2793 | ||
63ecc6d2 DC |
2794 | unregister_syscore_ops(&hv_synic_syscore_ops); |
2795 | ||
2517281d | 2796 | hv_remove_kexec_handler(); |
b4370df2 | 2797 | hv_remove_crash_handler(); |
09a19628 | 2798 | vmbus_connection.conn_state = DISCONNECTED; |
fd1fea68 | 2799 | hv_stimer_global_cleanup(); |
2db84eff | 2800 | vmbus_disconnect(); |
d608715d MK |
2801 | if (vmbus_irq == -1) { |
2802 | hv_remove_vmbus_handler(); | |
2803 | } else { | |
2804 | free_percpu_irq(vmbus_irq, vmbus_evt); | |
2805 | free_percpu(vmbus_evt); | |
2806 | } | |
37cdd991 SH |
2807 | for_each_online_cpu(cpu) { |
2808 | struct hv_per_cpu_context *hv_cpu | |
2809 | = per_cpu_ptr(hv_context.cpu_context, cpu); | |
2810 | ||
2811 | tasklet_kill(&hv_cpu->msg_dpc); | |
2812 | } | |
af9ca6f9 BB |
2813 | hv_debug_rm_all_dir(); |
2814 | ||
93e5bd06 | 2815 | vmbus_free_channels(); |
8b6a877c | 2816 | kfree(vmbus_connection.channels); |
37cdd991 | 2817 | |
cc2dd402 | 2818 | if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { |
81b18bce | 2819 | kmsg_dump_unregister(&hv_kmsg_dumper); |
510f7aef | 2820 | unregister_die_notifier(&hyperv_die_block); |
096c605f | 2821 | } |
81b18bce | 2822 | |
792f232d GP |
2823 | /* |
2824 | * The panic notifier is always registered, hence we should | |
2825 | * also unconditionally unregister it here as well. | |
2826 | */ | |
2827 | atomic_notifier_chain_unregister(&panic_notifier_list, | |
2828 | &hyperv_panic_block); | |
2829 | ||
81b18bce | 2830 | free_page((unsigned long)hv_panic_page); |
8afc06dd SM |
2831 | unregister_sysctl_table(hv_ctl_table_hdr); |
2832 | hv_ctl_table_hdr = NULL; | |
93e5bd06 | 2833 | bus_unregister(&hv_bus); |
37cdd991 | 2834 | |
76d36ab7 | 2835 | cpuhp_remove_state(hyperv_cpuhp_online); |
06210b42 | 2836 | hv_synic_free(); |
93e5bd06 S |
2837 | acpi_bus_unregister_driver(&vmbus_acpi_driver); |
2838 | } | |
2839 | ||
1168ac22 | 2840 | |
90c9960e | 2841 | MODULE_LICENSE("GPL"); |
674eecb3 | 2842 | MODULE_DESCRIPTION("Microsoft Hyper-V VMBus Driver"); |
3e7ee490 | 2843 | |
43d4e119 | 2844 | subsys_initcall(hv_acpi_init); |
93e5bd06 | 2845 | module_exit(vmbus_exit); |