Commit | Line | Data |
---|---|---|
23200b7a JM |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Copyright © 2019 Oracle and/or its affiliates. All rights reserved. | |
4 | * Copyright © 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. | |
5 | * | |
6 | * KVM Xen emulation | |
7 | */ | |
8d20bd63 | 8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
23200b7a JM |
9 | |
10 | #include "x86.h" | |
11 | #include "xen.h" | |
79033beb | 12 | #include "hyperv.h" |
66e3cf72 | 13 | #include "irq.h" |
23200b7a | 14 | |
2fd6df2f | 15 | #include <linux/eventfd.h> |
23200b7a | 16 | #include <linux/kvm_host.h> |
30b5c851 | 17 | #include <linux/sched/stat.h> |
23200b7a JM |
18 | |
19 | #include <trace/events/kvm.h> | |
13ffb97a | 20 | #include <xen/interface/xen.h> |
30b5c851 | 21 | #include <xen/interface/vcpu.h> |
28d1629f | 22 | #include <xen/interface/version.h> |
14243b38 | 23 | #include <xen/interface/event_channel.h> |
0ec6c5c5 | 24 | #include <xen/interface/sched.h> |
23200b7a | 25 | |
f422f853 | 26 | #include <asm/xen/cpuid.h> |
451a7078 | 27 | #include <asm/pvclock.h> |
f422f853 PD |
28 | |
29 | #include "cpuid.h" | |
23200b7a JM |
30 | #include "trace.h" |
31 | ||
53639526 | 32 | static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm); |
2fd6df2f JM |
33 | static int kvm_xen_setattr_evtchn(struct kvm *kvm, struct kvm_xen_hvm_attr *data); |
34 | static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r); | |
35 | ||
7d6bbebb DW |
36 | DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ); |
37 | ||
c01c55a3 | 38 | static int kvm_xen_shared_info_init(struct kvm *kvm) |
13ffb97a | 39 | { |
1cfc9c4b | 40 | struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; |
55749769 | 41 | struct pvclock_wall_clock *wc; |
55749769 DW |
42 | u32 *wc_sec_hi; |
43 | u32 wc_version; | |
44 | u64 wall_nsec; | |
319afe68 | 45 | int ret = 0; |
13ffb97a JM |
46 | int idx = srcu_read_lock(&kvm->srcu); |
47 | ||
c01c55a3 PD |
48 | read_lock_irq(&gpc->lock); |
49 | while (!kvm_gpc_check(gpc, PAGE_SIZE)) { | |
50 | read_unlock_irq(&gpc->lock); | |
1cfc9c4b | 51 | |
c01c55a3 | 52 | ret = kvm_gpc_refresh(gpc, PAGE_SIZE); |
55749769 DW |
53 | if (ret) |
54 | goto out; | |
55 | ||
55749769 | 56 | read_lock_irq(&gpc->lock); |
c01c55a3 | 57 | } |
55749769 | 58 | |
c01c55a3 PD |
59 | /* |
60 | * This code mirrors kvm_write_wall_clock() except that it writes | |
61 | * directly through the pfn cache and doesn't mark the page dirty. | |
62 | */ | |
63 | wall_nsec = kvm_get_wall_clock_epoch(kvm); | |
629b5348 JM |
64 | |
65 | /* Paranoia checks on the 32-bit struct layout */ | |
66 | BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900); | |
67 | BUILD_BUG_ON(offsetof(struct compat_shared_info, arch.wc_sec_hi) != 0x924); | |
68 | BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); | |
69 | ||
629b5348 JM |
70 | #ifdef CONFIG_X86_64 |
71 | /* Paranoia checks on the 64-bit struct layout */ | |
72 | BUILD_BUG_ON(offsetof(struct shared_info, wc) != 0xc00); | |
73 | BUILD_BUG_ON(offsetof(struct shared_info, wc_sec_hi) != 0xc0c); | |
74 | ||
55749769 DW |
75 | if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { |
76 | struct shared_info *shinfo = gpc->khva; | |
77 | ||
78 | wc_sec_hi = &shinfo->wc_sec_hi; | |
79 | wc = &shinfo->wc; | |
80 | } else | |
629b5348 | 81 | #endif |
55749769 DW |
82 | { |
83 | struct compat_shared_info *shinfo = gpc->khva; | |
84 | ||
85 | wc_sec_hi = &shinfo->arch.wc_sec_hi; | |
86 | wc = &shinfo->wc; | |
87 | } | |
88 | ||
89 | /* Increment and ensure an odd value */ | |
90 | wc_version = wc->version = (wc->version + 1) | 1; | |
91 | smp_wmb(); | |
92 | ||
5d6d6a7d | 93 | wc->nsec = do_div(wall_nsec, NSEC_PER_SEC); |
55749769 DW |
94 | wc->sec = (u32)wall_nsec; |
95 | *wc_sec_hi = wall_nsec >> 32; | |
96 | smp_wmb(); | |
97 | ||
98 | wc->version = wc_version + 1; | |
99 | read_unlock_irq(&gpc->lock); | |
629b5348 | 100 | |
629b5348 | 101 | kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE); |
13ffb97a | 102 | |
629b5348 | 103 | out: |
13ffb97a JM |
104 | srcu_read_unlock(&kvm->srcu, idx); |
105 | return ret; | |
106 | } | |
107 | ||
53639526 JM |
108 | void kvm_xen_inject_timer_irqs(struct kvm_vcpu *vcpu) |
109 | { | |
110 | if (atomic_read(&vcpu->arch.xen.timer_pending) > 0) { | |
111 | struct kvm_xen_evtchn e; | |
112 | ||
113 | e.vcpu_id = vcpu->vcpu_id; | |
114 | e.vcpu_idx = vcpu->vcpu_idx; | |
115 | e.port = vcpu->arch.xen.timer_virq; | |
116 | e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; | |
117 | ||
118 | kvm_xen_set_evtchn(&e, vcpu->kvm); | |
119 | ||
120 | vcpu->arch.xen.timer_expires = 0; | |
121 | atomic_set(&vcpu->arch.xen.timer_pending, 0); | |
122 | } | |
123 | } | |
124 | ||
125 | static enum hrtimer_restart xen_timer_callback(struct hrtimer *timer) | |
126 | { | |
127 | struct kvm_vcpu *vcpu = container_of(timer, struct kvm_vcpu, | |
128 | arch.xen.timer); | |
77c9b9de DW |
129 | struct kvm_xen_evtchn e; |
130 | int rc; | |
131 | ||
53639526 JM |
132 | if (atomic_read(&vcpu->arch.xen.timer_pending)) |
133 | return HRTIMER_NORESTART; | |
134 | ||
77c9b9de DW |
135 | e.vcpu_id = vcpu->vcpu_id; |
136 | e.vcpu_idx = vcpu->vcpu_idx; | |
137 | e.port = vcpu->arch.xen.timer_virq; | |
138 | e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; | |
139 | ||
140 | rc = kvm_xen_set_evtchn_fast(&e, vcpu->kvm); | |
141 | if (rc != -EWOULDBLOCK) { | |
142 | vcpu->arch.xen.timer_expires = 0; | |
143 | return HRTIMER_NORESTART; | |
144 | } | |
145 | ||
53639526 JM |
146 | atomic_inc(&vcpu->arch.xen.timer_pending); |
147 | kvm_make_request(KVM_REQ_UNBLOCK, vcpu); | |
148 | kvm_vcpu_kick(vcpu); | |
149 | ||
150 | return HRTIMER_NORESTART; | |
151 | } | |
152 | ||
451a7078 DW |
153 | static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs, |
154 | bool linux_wa) | |
53639526 | 155 | { |
451a7078 DW |
156 | int64_t kernel_now, delta; |
157 | uint64_t guest_now; | |
158 | ||
159 | /* | |
160 | * The guest provides the requested timeout in absolute nanoseconds | |
161 | * of the KVM clock — as *it* sees it, based on the scaled TSC and | |
162 | * the pvclock information provided by KVM. | |
163 | * | |
164 | * The kernel doesn't support hrtimers based on CLOCK_MONOTONIC_RAW | |
165 | * so use CLOCK_MONOTONIC. In the timescales covered by timers, the | |
166 | * difference won't matter much as there is no cumulative effect. | |
167 | * | |
168 | * Calculate the time for some arbitrary point in time around "now" | |
169 | * in terms of both kvmclock and CLOCK_MONOTONIC. Calculate the | |
170 | * delta between the kvmclock "now" value and the guest's requested | |
171 | * timeout, apply the "Linux workaround" described below, and add | |
172 | * the resulting delta to the CLOCK_MONOTONIC "now" value, to get | |
173 | * the absolute CLOCK_MONOTONIC time at which the timer should | |
174 | * fire. | |
175 | */ | |
176 | if (vcpu->arch.hv_clock.version && vcpu->kvm->arch.use_master_clock && | |
177 | static_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | |
178 | uint64_t host_tsc, guest_tsc; | |
179 | ||
180 | if (!IS_ENABLED(CONFIG_64BIT) || | |
181 | !kvm_get_monotonic_and_clockread(&kernel_now, &host_tsc)) { | |
182 | /* | |
183 | * Don't fall back to get_kvmclock_ns() because it's | |
184 | * broken; it has a systemic error in its results | |
185 | * because it scales directly from host TSC to | |
186 | * nanoseconds, and doesn't scale first to guest TSC | |
187 | * and *then* to nanoseconds as the guest does. | |
188 | * | |
189 | * There is a small error introduced here because time | |
190 | * continues to elapse between the ktime_get() and the | |
191 | * subsequent rdtsc(). But not the systemic drift due | |
192 | * to get_kvmclock_ns(). | |
193 | */ | |
194 | kernel_now = ktime_get(); /* This is CLOCK_MONOTONIC */ | |
195 | host_tsc = rdtsc(); | |
196 | } | |
197 | ||
198 | /* Calculate the guest kvmclock as the guest would do it. */ | |
199 | guest_tsc = kvm_read_l1_tsc(vcpu, host_tsc); | |
200 | guest_now = __pvclock_read_cycles(&vcpu->arch.hv_clock, | |
201 | guest_tsc); | |
202 | } else { | |
203 | /* | |
204 | * Without CONSTANT_TSC, get_kvmclock_ns() is the only option. | |
205 | * | |
206 | * Also if the guest PV clock hasn't been set up yet, as is | |
207 | * likely to be the case during migration when the vCPU has | |
208 | * not been run yet. It would be possible to calculate the | |
209 | * scaling factors properly in that case but there's not much | |
210 | * point in doing so. The get_kvmclock_ns() drift accumulates | |
211 | * over time, so it's OK to use it at startup. Besides, on | |
212 | * migration there's going to be a little bit of skew in the | |
213 | * precise moment at which timers fire anyway. Often they'll | |
214 | * be in the "past" by the time the VM is running again after | |
215 | * migration. | |
216 | */ | |
217 | guest_now = get_kvmclock_ns(vcpu->kvm); | |
218 | kernel_now = ktime_get(); | |
219 | } | |
220 | ||
221 | delta = guest_abs - guest_now; | |
222 | ||
223 | /* | |
224 | * Xen has a 'Linux workaround' in do_set_timer_op() which checks for | |
225 | * negative absolute timeout values (caused by integer overflow), and | |
226 | * for values about 13 days in the future (2^50ns) which would be | |
227 | * caused by jiffies overflow. For those cases, Xen sets the timeout | |
228 | * 100ms in the future (not *too* soon, since if a guest really did | |
229 | * set a long timeout on purpose we don't want to keep churning CPU | |
230 | * time by waking it up). Emulate Xen's workaround when starting the | |
231 | * timer in response to __HYPERVISOR_set_timer_op. | |
232 | */ | |
233 | if (linux_wa && | |
234 | unlikely((int64_t)guest_abs < 0 || | |
235 | (delta > 0 && (uint32_t) (delta >> 50) != 0))) { | |
236 | delta = 100 * NSEC_PER_MSEC; | |
237 | guest_abs = guest_now + delta; | |
238 | } | |
239 | ||
77c9b9de DW |
240 | /* |
241 | * Avoid races with the old timer firing. Checking timer_expires | |
242 | * to avoid calling hrtimer_cancel() will only have false positives | |
243 | * so is fine. | |
244 | */ | |
245 | if (vcpu->arch.xen.timer_expires) | |
246 | hrtimer_cancel(&vcpu->arch.xen.timer); | |
247 | ||
53639526 JM |
248 | atomic_set(&vcpu->arch.xen.timer_pending, 0); |
249 | vcpu->arch.xen.timer_expires = guest_abs; | |
250 | ||
451a7078 | 251 | if (delta <= 0) |
53639526 | 252 | xen_timer_callback(&vcpu->arch.xen.timer); |
451a7078 | 253 | else |
53639526 | 254 | hrtimer_start(&vcpu->arch.xen.timer, |
451a7078 | 255 | ktime_add_ns(kernel_now, delta), |
53639526 | 256 | HRTIMER_MODE_ABS_HARD); |
53639526 JM |
257 | } |
258 | ||
259 | static void kvm_xen_stop_timer(struct kvm_vcpu *vcpu) | |
260 | { | |
261 | hrtimer_cancel(&vcpu->arch.xen.timer); | |
262 | vcpu->arch.xen.timer_expires = 0; | |
263 | atomic_set(&vcpu->arch.xen.timer_pending, 0); | |
264 | } | |
265 | ||
266 | static void kvm_xen_init_timer(struct kvm_vcpu *vcpu) | |
267 | { | |
268 | hrtimer_init(&vcpu->arch.xen.timer, CLOCK_MONOTONIC, | |
269 | HRTIMER_MODE_ABS_HARD); | |
270 | vcpu->arch.xen.timer.function = xen_timer_callback; | |
271 | } | |
272 | ||
5ec3289b | 273 | static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) |
30b5c851 DW |
274 | { |
275 | struct kvm_vcpu_xen *vx = &v->arch.xen; | |
5ec3289b DW |
276 | struct gfn_to_pfn_cache *gpc1 = &vx->runstate_cache; |
277 | struct gfn_to_pfn_cache *gpc2 = &vx->runstate2_cache; | |
278 | size_t user_len, user_len1, user_len2; | |
279 | struct vcpu_runstate_info rs; | |
a795cd43 | 280 | unsigned long flags; |
5ec3289b | 281 | size_t times_ofs; |
d8ba8ba4 DW |
282 | uint8_t *update_bit = NULL; |
283 | uint64_t entry_time; | |
5ec3289b DW |
284 | uint64_t *rs_times; |
285 | int *rs_state; | |
30b5c851 | 286 | |
30b5c851 | 287 | /* |
fcb732d8 | 288 | * The only difference between 32-bit and 64-bit versions of the |
5ec3289b | 289 | * runstate struct is the alignment of uint64_t in 32-bit, which |
fcb732d8 | 290 | * means that the 64-bit version has an additional 4 bytes of |
5ec3289b DW |
291 | * padding after the first field 'state'. Let's be really really |
292 | * paranoid about that, and matching it with our internal data | |
293 | * structures that we memcpy into it... | |
30b5c851 | 294 | */ |
fcb732d8 DW |
295 | BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0); |
296 | BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0); | |
fcb732d8 | 297 | BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); |
fcb732d8 | 298 | #ifdef CONFIG_X86_64 |
5ec3289b DW |
299 | /* |
300 | * The 64-bit structure has 4 bytes of padding before 'state_entry_time' | |
301 | * so each subsequent field is shifted by 4, and it's 4 bytes longer. | |
302 | */ | |
30b5c851 DW |
303 | BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != |
304 | offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4); | |
305 | BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) != | |
306 | offsetof(struct compat_vcpu_runstate_info, time) + 4); | |
5ec3289b | 307 | BUILD_BUG_ON(sizeof(struct vcpu_runstate_info) != 0x2c + 4); |
30b5c851 DW |
308 | #endif |
309 | /* | |
5ec3289b DW |
310 | * The state field is in the same place at the start of both structs, |
311 | * and is the same size (int) as vx->current_runstate. | |
30b5c851 DW |
312 | */ |
313 | BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != | |
314 | offsetof(struct compat_vcpu_runstate_info, state)); | |
6a834754 | 315 | BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) != |
30b5c851 | 316 | sizeof(vx->current_runstate)); |
6a834754 | 317 | BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) != |
30b5c851 DW |
318 | sizeof(vx->current_runstate)); |
319 | ||
5ec3289b DW |
320 | /* |
321 | * The state_entry_time field is 64 bits in both versions, and the | |
322 | * XEN_RUNSTATE_UPDATE flag is in the top bit, which given that x86 | |
323 | * is little-endian means that it's in the last *byte* of the word. | |
324 | * That detail is important later. | |
325 | */ | |
326 | BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) != | |
327 | sizeof(uint64_t)); | |
328 | BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) != | |
329 | sizeof(uint64_t)); | |
330 | BUILD_BUG_ON((XEN_RUNSTATE_UPDATE >> 56) != 0x80); | |
30b5c851 DW |
331 | |
332 | /* | |
5ec3289b DW |
333 | * The time array is four 64-bit quantities in both versions, matching |
334 | * the vx->runstate_times and immediately following state_entry_time. | |
30b5c851 DW |
335 | */ |
336 | BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != | |
5ec3289b | 337 | offsetof(struct vcpu_runstate_info, time) - sizeof(uint64_t)); |
30b5c851 | 338 | BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) != |
5ec3289b | 339 | offsetof(struct compat_vcpu_runstate_info, time) - sizeof(uint64_t)); |
6a834754 DW |
340 | BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != |
341 | sizeof_field(struct compat_vcpu_runstate_info, time)); | |
342 | BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != | |
30b5c851 DW |
343 | sizeof(vx->runstate_times)); |
344 | ||
5ec3289b DW |
345 | if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) { |
346 | user_len = sizeof(struct vcpu_runstate_info); | |
347 | times_ofs = offsetof(struct vcpu_runstate_info, | |
348 | state_entry_time); | |
349 | } else { | |
350 | user_len = sizeof(struct compat_vcpu_runstate_info); | |
351 | times_ofs = offsetof(struct compat_vcpu_runstate_info, | |
352 | state_entry_time); | |
353 | } | |
354 | ||
355 | /* | |
356 | * There are basically no alignment constraints. The guest can set it | |
357 | * up so it crosses from one page to the next, and at arbitrary byte | |
358 | * alignment (and the 32-bit ABI doesn't align the 64-bit integers | |
359 | * anyway, even if the overall struct had been 64-bit aligned). | |
360 | */ | |
361 | if ((gpc1->gpa & ~PAGE_MASK) + user_len >= PAGE_SIZE) { | |
362 | user_len1 = PAGE_SIZE - (gpc1->gpa & ~PAGE_MASK); | |
363 | user_len2 = user_len - user_len1; | |
364 | } else { | |
365 | user_len1 = user_len; | |
366 | user_len2 = 0; | |
367 | } | |
368 | BUG_ON(user_len1 + user_len2 != user_len); | |
369 | ||
370 | retry: | |
371 | /* | |
372 | * Attempt to obtain the GPC lock on *both* (if there are two) | |
373 | * gfn_to_pfn caches that cover the region. | |
374 | */ | |
bbe17c62 DW |
375 | if (atomic) { |
376 | local_irq_save(flags); | |
377 | if (!read_trylock(&gpc1->lock)) { | |
378 | local_irq_restore(flags); | |
379 | return; | |
380 | } | |
381 | } else { | |
382 | read_lock_irqsave(&gpc1->lock, flags); | |
383 | } | |
58f5ee5f | 384 | while (!kvm_gpc_check(gpc1, user_len1)) { |
5ec3289b DW |
385 | read_unlock_irqrestore(&gpc1->lock, flags); |
386 | ||
387 | /* When invoked from kvm_sched_out() we cannot sleep */ | |
388 | if (atomic) | |
389 | return; | |
390 | ||
58f5ee5f | 391 | if (kvm_gpc_refresh(gpc1, user_len1)) |
5ec3289b DW |
392 | return; |
393 | ||
394 | read_lock_irqsave(&gpc1->lock, flags); | |
395 | } | |
396 | ||
397 | if (likely(!user_len2)) { | |
398 | /* | |
399 | * Set up three pointers directly to the runstate_info | |
400 | * struct in the guest (via the GPC). | |
401 | * | |
402 | * • @rs_state → state field | |
403 | * • @rs_times → state_entry_time field. | |
404 | * • @update_bit → last byte of state_entry_time, which | |
405 | * contains the XEN_RUNSTATE_UPDATE bit. | |
406 | */ | |
407 | rs_state = gpc1->khva; | |
408 | rs_times = gpc1->khva + times_ofs; | |
d8ba8ba4 DW |
409 | if (v->kvm->arch.xen.runstate_update_flag) |
410 | update_bit = ((void *)(&rs_times[1])) - 1; | |
5ec3289b DW |
411 | } else { |
412 | /* | |
413 | * The guest's runstate_info is split across two pages and we | |
414 | * need to hold and validate both GPCs simultaneously. We can | |
415 | * declare a lock ordering GPC1 > GPC2 because nothing else | |
23e60258 DW |
416 | * takes them more than one at a time. Set a subclass on the |
417 | * gpc1 lock to make lockdep shut up about it. | |
5ec3289b | 418 | */ |
23e60258 | 419 | lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_); |
bbe17c62 DW |
420 | if (atomic) { |
421 | if (!read_trylock(&gpc2->lock)) { | |
422 | read_unlock_irqrestore(&gpc1->lock, flags); | |
423 | return; | |
424 | } | |
425 | } else { | |
426 | read_lock(&gpc2->lock); | |
427 | } | |
5ec3289b | 428 | |
58f5ee5f | 429 | if (!kvm_gpc_check(gpc2, user_len2)) { |
5ec3289b DW |
430 | read_unlock(&gpc2->lock); |
431 | read_unlock_irqrestore(&gpc1->lock, flags); | |
432 | ||
433 | /* When invoked from kvm_sched_out() we cannot sleep */ | |
434 | if (atomic) | |
435 | return; | |
436 | ||
437 | /* | |
438 | * Use kvm_gpc_activate() here because if the runstate | |
439 | * area was configured in 32-bit mode and only extends | |
440 | * to the second page now because the guest changed to | |
441 | * 64-bit mode, the second GPC won't have been set up. | |
442 | */ | |
8c82a0b3 ML |
443 | if (kvm_gpc_activate(gpc2, gpc1->gpa + user_len1, |
444 | user_len2)) | |
5ec3289b DW |
445 | return; |
446 | ||
447 | /* | |
448 | * We dropped the lock on GPC1 so we have to go all the | |
449 | * way back and revalidate that too. | |
450 | */ | |
451 | goto retry; | |
452 | } | |
453 | ||
454 | /* | |
455 | * In this case, the runstate_info struct will be assembled on | |
456 | * the kernel stack (compat or not as appropriate) and will | |
457 | * be copied to GPC1/GPC2 with a dual memcpy. Set up the three | |
458 | * rs pointers accordingly. | |
459 | */ | |
460 | rs_times = &rs.state_entry_time; | |
461 | ||
462 | /* | |
463 | * The rs_state pointer points to the start of what we'll | |
464 | * copy to the guest, which in the case of a compat guest | |
465 | * is the 32-bit field that the compiler thinks is padding. | |
466 | */ | |
467 | rs_state = ((void *)rs_times) - times_ofs; | |
468 | ||
469 | /* | |
470 | * The update_bit is still directly in the guest memory, | |
471 | * via one GPC or the other. | |
472 | */ | |
d8ba8ba4 DW |
473 | if (v->kvm->arch.xen.runstate_update_flag) { |
474 | if (user_len1 >= times_ofs + sizeof(uint64_t)) | |
475 | update_bit = gpc1->khva + times_ofs + | |
476 | sizeof(uint64_t) - 1; | |
477 | else | |
478 | update_bit = gpc2->khva + times_ofs + | |
479 | sizeof(uint64_t) - 1 - user_len1; | |
480 | } | |
5ec3289b DW |
481 | |
482 | #ifdef CONFIG_X86_64 | |
483 | /* | |
484 | * Don't leak kernel memory through the padding in the 64-bit | |
485 | * version of the struct. | |
486 | */ | |
487 | memset(&rs, 0, offsetof(struct vcpu_runstate_info, state_entry_time)); | |
488 | #endif | |
489 | } | |
490 | ||
491 | /* | |
492 | * First, set the XEN_RUNSTATE_UPDATE bit in the top bit of the | |
493 | * state_entry_time field, directly in the guest. We need to set | |
494 | * that (and write-barrier) before writing to the rest of the | |
495 | * structure, and clear it last. Just as Xen does, we address the | |
496 | * single *byte* in which it resides because it might be in a | |
497 | * different cache line to the rest of the 64-bit word, due to | |
498 | * the (lack of) alignment constraints. | |
499 | */ | |
d8ba8ba4 DW |
500 | entry_time = vx->runstate_entry_time; |
501 | if (update_bit) { | |
502 | entry_time |= XEN_RUNSTATE_UPDATE; | |
503 | *update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56; | |
504 | smp_wmb(); | |
505 | } | |
30b5c851 DW |
506 | |
507 | /* | |
5ec3289b DW |
508 | * Now assemble the actual structure, either on our kernel stack |
509 | * or directly in the guest according to how the rs_state and | |
510 | * rs_times pointers were set up above. | |
30b5c851 | 511 | */ |
5ec3289b | 512 | *rs_state = vx->current_runstate; |
d8ba8ba4 | 513 | rs_times[0] = entry_time; |
5ec3289b DW |
514 | memcpy(rs_times + 1, vx->runstate_times, sizeof(vx->runstate_times)); |
515 | ||
516 | /* For the split case, we have to then copy it to the guest. */ | |
517 | if (user_len2) { | |
518 | memcpy(gpc1->khva, rs_state, user_len1); | |
519 | memcpy(gpc2->khva, ((void *)rs_state) + user_len1, user_len2); | |
520 | } | |
fcb732d8 DW |
521 | smp_wmb(); |
522 | ||
5ec3289b | 523 | /* Finally, clear the XEN_RUNSTATE_UPDATE bit. */ |
d8ba8ba4 DW |
524 | if (update_bit) { |
525 | entry_time &= ~XEN_RUNSTATE_UPDATE; | |
526 | *update_bit = entry_time >> 56; | |
527 | smp_wmb(); | |
528 | } | |
fcb732d8 | 529 | |
4438355e | 530 | if (user_len2) { |
78b74638 | 531 | kvm_gpc_mark_dirty_in_slot(gpc2); |
5ec3289b | 532 | read_unlock(&gpc2->lock); |
4438355e | 533 | } |
5ec3289b | 534 | |
78b74638 | 535 | kvm_gpc_mark_dirty_in_slot(gpc1); |
4438355e | 536 | read_unlock_irqrestore(&gpc1->lock, flags); |
5ec3289b DW |
537 | } |
538 | ||
539 | void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) | |
540 | { | |
541 | struct kvm_vcpu_xen *vx = &v->arch.xen; | |
542 | u64 now = get_kvmclock_ns(v->kvm); | |
543 | u64 delta_ns = now - vx->runstate_entry_time; | |
544 | u64 run_delay = current->sched_info.run_delay; | |
545 | ||
546 | if (unlikely(!vx->runstate_entry_time)) | |
547 | vx->current_runstate = RUNSTATE_offline; | |
548 | ||
549 | /* | |
550 | * Time waiting for the scheduler isn't "stolen" if the | |
551 | * vCPU wasn't running anyway. | |
552 | */ | |
553 | if (vx->current_runstate == RUNSTATE_running) { | |
554 | u64 steal_ns = run_delay - vx->last_steal; | |
555 | ||
556 | delta_ns -= steal_ns; | |
557 | ||
558 | vx->runstate_times[RUNSTATE_runnable] += steal_ns; | |
559 | } | |
560 | vx->last_steal = run_delay; | |
561 | ||
562 | vx->runstate_times[vx->current_runstate] += delta_ns; | |
563 | vx->current_runstate = state; | |
564 | vx->runstate_entry_time = now; | |
565 | ||
566 | if (vx->runstate_cache.active) | |
567 | kvm_xen_update_runstate_guest(v, state == RUNSTATE_runnable); | |
30b5c851 DW |
568 | } |
569 | ||
8e62bf2b | 570 | void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) |
fde0451b DW |
571 | { |
572 | struct kvm_lapic_irq irq = { }; | |
fde0451b DW |
573 | |
574 | irq.dest_id = v->vcpu_id; | |
575 | irq.vector = v->arch.xen.upcall_vector; | |
576 | irq.dest_mode = APIC_DEST_PHYSICAL; | |
577 | irq.shorthand = APIC_DEST_NOSHORT; | |
578 | irq.delivery_mode = APIC_DM_FIXED; | |
579 | irq.level = 1; | |
580 | ||
66e3cf72 | 581 | kvm_irq_delivery_to_apic(v->kvm, NULL, &irq, NULL); |
fde0451b DW |
582 | } |
583 | ||
7caf9571 DW |
584 | /* |
585 | * On event channel delivery, the vcpu_info may not have been accessible. | |
586 | * In that case, there are bits in vcpu->arch.xen.evtchn_pending_sel which | |
587 | * need to be marked into the vcpu_info (and evtchn_upcall_pending set). | |
588 | * Do so now that we can sleep in the context of the vCPU to bring the | |
589 | * page in, and refresh the pfn cache for it. | |
590 | */ | |
591 | void kvm_xen_inject_pending_events(struct kvm_vcpu *v) | |
40da8ccd | 592 | { |
14243b38 | 593 | unsigned long evtchn_pending_sel = READ_ONCE(v->arch.xen.evtchn_pending_sel); |
7caf9571 DW |
594 | struct gfn_to_pfn_cache *gpc = &v->arch.xen.vcpu_info_cache; |
595 | unsigned long flags; | |
596 | ||
597 | if (!evtchn_pending_sel) | |
598 | return; | |
599 | ||
600 | /* | |
601 | * Yes, this is an open-coded loop. But that's just what put_user() | |
602 | * does anyway. Page it in and retry the instruction. We're just a | |
603 | * little more honest about it. | |
604 | */ | |
605 | read_lock_irqsave(&gpc->lock, flags); | |
58f5ee5f | 606 | while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { |
7caf9571 DW |
607 | read_unlock_irqrestore(&gpc->lock, flags); |
608 | ||
58f5ee5f | 609 | if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info))) |
7caf9571 DW |
610 | return; |
611 | ||
612 | read_lock_irqsave(&gpc->lock, flags); | |
613 | } | |
614 | ||
615 | /* Now gpc->khva is a valid kernel address for the vcpu_info */ | |
616 | if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) { | |
617 | struct vcpu_info *vi = gpc->khva; | |
618 | ||
619 | asm volatile(LOCK_PREFIX "orq %0, %1\n" | |
620 | "notq %0\n" | |
621 | LOCK_PREFIX "andq %0, %2\n" | |
622 | : "=r" (evtchn_pending_sel), | |
623 | "+m" (vi->evtchn_pending_sel), | |
624 | "+m" (v->arch.xen.evtchn_pending_sel) | |
625 | : "0" (evtchn_pending_sel)); | |
626 | WRITE_ONCE(vi->evtchn_upcall_pending, 1); | |
627 | } else { | |
628 | u32 evtchn_pending_sel32 = evtchn_pending_sel; | |
629 | struct compat_vcpu_info *vi = gpc->khva; | |
630 | ||
631 | asm volatile(LOCK_PREFIX "orl %0, %1\n" | |
632 | "notl %0\n" | |
633 | LOCK_PREFIX "andl %0, %2\n" | |
634 | : "=r" (evtchn_pending_sel32), | |
635 | "+m" (vi->evtchn_pending_sel), | |
636 | "+m" (v->arch.xen.evtchn_pending_sel) | |
637 | : "0" (evtchn_pending_sel32)); | |
638 | WRITE_ONCE(vi->evtchn_upcall_pending, 1); | |
639 | } | |
4438355e | 640 | |
78b74638 | 641 | kvm_gpc_mark_dirty_in_slot(gpc); |
7caf9571 DW |
642 | read_unlock_irqrestore(&gpc->lock, flags); |
643 | ||
fde0451b DW |
644 | /* For the per-vCPU lapic vector, deliver it as MSI. */ |
645 | if (v->arch.xen.upcall_vector) | |
646 | kvm_xen_inject_vcpu_vector(v); | |
7caf9571 DW |
647 | } |
648 | ||
649 | int __kvm_xen_has_interrupt(struct kvm_vcpu *v) | |
650 | { | |
651 | struct gfn_to_pfn_cache *gpc = &v->arch.xen.vcpu_info_cache; | |
652 | unsigned long flags; | |
40da8ccd DW |
653 | u8 rc = 0; |
654 | ||
655 | /* | |
656 | * If the global upcall vector (HVMIRQ_callback_vector) is set and | |
657 | * the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending. | |
658 | */ | |
40da8ccd DW |
659 | |
660 | /* No need for compat handling here */ | |
661 | BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) != | |
662 | offsetof(struct compat_vcpu_info, evtchn_upcall_pending)); | |
663 | BUILD_BUG_ON(sizeof(rc) != | |
6a834754 | 664 | sizeof_field(struct vcpu_info, evtchn_upcall_pending)); |
40da8ccd | 665 | BUILD_BUG_ON(sizeof(rc) != |
6a834754 | 666 | sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending)); |
40da8ccd | 667 | |
7caf9571 | 668 | read_lock_irqsave(&gpc->lock, flags); |
58f5ee5f | 669 | while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { |
7caf9571 | 670 | read_unlock_irqrestore(&gpc->lock, flags); |
0985dba8 | 671 | |
7caf9571 DW |
672 | /* |
673 | * This function gets called from kvm_vcpu_block() after setting the | |
674 | * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately | |
675 | * from a HLT. So we really mustn't sleep. If the page ended up absent | |
676 | * at that point, just return 1 in order to trigger an immediate wake, | |
677 | * and we'll end up getting called again from a context where we *can* | |
678 | * fault in the page and wait for it. | |
679 | */ | |
680 | if (in_atomic() || !task_is_running(current)) | |
681 | return 1; | |
0985dba8 | 682 | |
58f5ee5f | 683 | if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info))) { |
14243b38 DW |
684 | /* |
685 | * If this failed, userspace has screwed up the | |
686 | * vcpu_info mapping. No interrupts for you. | |
687 | */ | |
688 | return 0; | |
689 | } | |
7caf9571 | 690 | read_lock_irqsave(&gpc->lock, flags); |
14243b38 DW |
691 | } |
692 | ||
7caf9571 DW |
693 | rc = ((struct vcpu_info *)gpc->khva)->evtchn_upcall_pending; |
694 | read_unlock_irqrestore(&gpc->lock, flags); | |
40da8ccd DW |
695 | return rc; |
696 | } | |
697 | ||
a76b9641 JM |
698 | int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) |
699 | { | |
700 | int r = -ENOENT; | |
701 | ||
13ffb97a | 702 | |
a76b9641 | 703 | switch (data->type) { |
a3833b81 | 704 | case KVM_XEN_ATTR_TYPE_LONG_MODE: |
13ffb97a JM |
705 | if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) { |
706 | r = -EINVAL; | |
707 | } else { | |
310bc395 | 708 | mutex_lock(&kvm->arch.xen.xen_lock); |
13ffb97a | 709 | kvm->arch.xen.long_mode = !!data->u.long_mode; |
18b99e4d PD |
710 | |
711 | /* | |
712 | * Re-initialize shared_info to put the wallclock in the | |
713 | * correct place. Whilst it's not necessary to do this | |
714 | * unless the mode is actually changed, it does no harm | |
715 | * to make the call anyway. | |
716 | */ | |
717 | r = kvm->arch.xen.shinfo_cache.active ? | |
718 | kvm_xen_shared_info_init(kvm) : 0; | |
310bc395 | 719 | mutex_unlock(&kvm->arch.xen.xen_lock); |
13ffb97a JM |
720 | } |
721 | break; | |
a3833b81 | 722 | |
b9220d32 PD |
723 | case KVM_XEN_ATTR_TYPE_SHARED_INFO: |
724 | case KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA: { | |
c01c55a3 PD |
725 | int idx; |
726 | ||
310bc395 | 727 | mutex_lock(&kvm->arch.xen.xen_lock); |
c01c55a3 PD |
728 | |
729 | idx = srcu_read_lock(&kvm->srcu); | |
730 | ||
b9220d32 PD |
731 | if (data->type == KVM_XEN_ATTR_TYPE_SHARED_INFO) { |
732 | gfn_t gfn = data->u.shared_info.gfn; | |
733 | ||
734 | if (gfn == KVM_XEN_INVALID_GFN) { | |
735 | kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache); | |
736 | r = 0; | |
737 | } else { | |
738 | r = kvm_gpc_activate(&kvm->arch.xen.shinfo_cache, | |
739 | gfn_to_gpa(gfn), PAGE_SIZE); | |
740 | } | |
c01c55a3 | 741 | } else { |
b9220d32 PD |
742 | void __user * hva = u64_to_user_ptr(data->u.shared_info.hva); |
743 | ||
ebbdf37c | 744 | if (!PAGE_ALIGNED(hva)) { |
b9220d32 PD |
745 | r = -EINVAL; |
746 | } else if (!hva) { | |
747 | kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache); | |
748 | r = 0; | |
749 | } else { | |
750 | r = kvm_gpc_activate_hva(&kvm->arch.xen.shinfo_cache, | |
751 | (unsigned long)hva, PAGE_SIZE); | |
752 | } | |
c01c55a3 PD |
753 | } |
754 | ||
755 | srcu_read_unlock(&kvm->srcu, idx); | |
756 | ||
757 | if (!r && kvm->arch.xen.shinfo_cache.active) | |
758 | r = kvm_xen_shared_info_init(kvm); | |
759 | ||
310bc395 | 760 | mutex_unlock(&kvm->arch.xen.xen_lock); |
a3833b81 | 761 | break; |
c01c55a3 | 762 | } |
40da8ccd | 763 | case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: |
0c165b3c | 764 | if (data->u.vector && data->u.vector < 0x10) |
40da8ccd DW |
765 | r = -EINVAL; |
766 | else { | |
310bc395 | 767 | mutex_lock(&kvm->arch.xen.xen_lock); |
40da8ccd | 768 | kvm->arch.xen.upcall_vector = data->u.vector; |
310bc395 | 769 | mutex_unlock(&kvm->arch.xen.xen_lock); |
40da8ccd DW |
770 | r = 0; |
771 | } | |
772 | break; | |
773 | ||
2fd6df2f JM |
774 | case KVM_XEN_ATTR_TYPE_EVTCHN: |
775 | r = kvm_xen_setattr_evtchn(kvm, data); | |
776 | break; | |
777 | ||
28d1629f | 778 | case KVM_XEN_ATTR_TYPE_XEN_VERSION: |
310bc395 | 779 | mutex_lock(&kvm->arch.xen.xen_lock); |
28d1629f | 780 | kvm->arch.xen.xen_version = data->u.xen_version; |
310bc395 | 781 | mutex_unlock(&kvm->arch.xen.xen_lock); |
28d1629f DW |
782 | r = 0; |
783 | break; | |
784 | ||
d8ba8ba4 DW |
785 | case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG: |
786 | if (!sched_info_on()) { | |
787 | r = -EOPNOTSUPP; | |
788 | break; | |
789 | } | |
310bc395 | 790 | mutex_lock(&kvm->arch.xen.xen_lock); |
d8ba8ba4 | 791 | kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag; |
310bc395 | 792 | mutex_unlock(&kvm->arch.xen.xen_lock); |
d8ba8ba4 DW |
793 | r = 0; |
794 | break; | |
795 | ||
a76b9641 JM |
796 | default: |
797 | break; | |
798 | } | |
799 | ||
a76b9641 JM |
800 | return r; |
801 | } | |
802 | ||
803 | int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) | |
804 | { | |
805 | int r = -ENOENT; | |
806 | ||
310bc395 | 807 | mutex_lock(&kvm->arch.xen.xen_lock); |
a76b9641 JM |
808 | |
809 | switch (data->type) { | |
a3833b81 DW |
810 | case KVM_XEN_ATTR_TYPE_LONG_MODE: |
811 | data->u.long_mode = kvm->arch.xen.long_mode; | |
812 | r = 0; | |
813 | break; | |
13ffb97a JM |
814 | |
815 | case KVM_XEN_ATTR_TYPE_SHARED_INFO: | |
b9220d32 | 816 | if (kvm_gpc_is_gpa_active(&kvm->arch.xen.shinfo_cache)) |
1cfc9c4b DW |
817 | data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa); |
818 | else | |
b0305c1e | 819 | data->u.shared_info.gfn = KVM_XEN_INVALID_GFN; |
0c165b3c | 820 | r = 0; |
13ffb97a JM |
821 | break; |
822 | ||
b9220d32 PD |
823 | case KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA: |
824 | if (kvm_gpc_is_hva_active(&kvm->arch.xen.shinfo_cache)) | |
825 | data->u.shared_info.hva = kvm->arch.xen.shinfo_cache.uhva; | |
826 | else | |
827 | data->u.shared_info.hva = 0; | |
828 | r = 0; | |
829 | break; | |
830 | ||
40da8ccd DW |
831 | case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: |
832 | data->u.vector = kvm->arch.xen.upcall_vector; | |
833 | r = 0; | |
834 | break; | |
835 | ||
28d1629f DW |
836 | case KVM_XEN_ATTR_TYPE_XEN_VERSION: |
837 | data->u.xen_version = kvm->arch.xen.xen_version; | |
838 | r = 0; | |
839 | break; | |
840 | ||
d8ba8ba4 DW |
841 | case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG: |
842 | if (!sched_info_on()) { | |
843 | r = -EOPNOTSUPP; | |
844 | break; | |
845 | } | |
846 | data->u.runstate_update_flag = kvm->arch.xen.runstate_update_flag; | |
847 | r = 0; | |
848 | break; | |
849 | ||
a76b9641 JM |
850 | default: |
851 | break; | |
852 | } | |
853 | ||
310bc395 | 854 | mutex_unlock(&kvm->arch.xen.xen_lock); |
a76b9641 JM |
855 | return r; |
856 | } | |
857 | ||
3e324615 DW |
858 | int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) |
859 | { | |
73e69a86 | 860 | int idx, r = -ENOENT; |
3e324615 | 861 | |
310bc395 | 862 | mutex_lock(&vcpu->kvm->arch.xen.xen_lock); |
73e69a86 | 863 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
3e324615 DW |
864 | |
865 | switch (data->type) { | |
73e69a86 | 866 | case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: |
3991f358 | 867 | case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA: |
73e69a86 JM |
868 | /* No compat necessary here. */ |
869 | BUILD_BUG_ON(sizeof(struct vcpu_info) != | |
870 | sizeof(struct compat_vcpu_info)); | |
7d7c5f76 DW |
871 | BUILD_BUG_ON(offsetof(struct vcpu_info, time) != |
872 | offsetof(struct compat_vcpu_info, time)); | |
73e69a86 | 873 | |
3991f358 PD |
874 | if (data->type == KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO) { |
875 | if (data->u.gpa == KVM_XEN_INVALID_GPA) { | |
876 | kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); | |
877 | r = 0; | |
878 | break; | |
879 | } | |
880 | ||
881 | r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_info_cache, | |
882 | data->u.gpa, sizeof(struct vcpu_info)); | |
883 | } else { | |
884 | if (data->u.hva == 0) { | |
885 | kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); | |
886 | r = 0; | |
887 | break; | |
888 | } | |
889 | ||
890 | r = kvm_gpc_activate_hva(&vcpu->arch.xen.vcpu_info_cache, | |
891 | data->u.hva, sizeof(struct vcpu_info)); | |
0c165b3c DW |
892 | } |
893 | ||
7caf9571 | 894 | if (!r) |
aa096aa0 | 895 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
7caf9571 | 896 | |
73e69a86 JM |
897 | break; |
898 | ||
f2340cd9 | 899 | case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: |
b0305c1e | 900 | if (data->u.gpa == KVM_XEN_INVALID_GPA) { |
8c82a0b3 | 901 | kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache); |
7d7c5f76 | 902 | r = 0; |
0c165b3c DW |
903 | break; |
904 | } | |
905 | ||
8c82a0b3 ML |
906 | r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_time_info_cache, |
907 | data->u.gpa, | |
52491a38 | 908 | sizeof(struct pvclock_vcpu_time_info)); |
69d413cf | 909 | if (!r) |
f2340cd9 | 910 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
f2340cd9 JM |
911 | break; |
912 | ||
5ec3289b DW |
913 | case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: { |
914 | size_t sz, sz1, sz2; | |
915 | ||
30b5c851 DW |
916 | if (!sched_info_on()) { |
917 | r = -EOPNOTSUPP; | |
918 | break; | |
919 | } | |
b0305c1e | 920 | if (data->u.gpa == KVM_XEN_INVALID_GPA) { |
5ec3289b DW |
921 | r = 0; |
922 | deactivate_out: | |
8c82a0b3 ML |
923 | kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache); |
924 | kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache); | |
30b5c851 DW |
925 | break; |
926 | } | |
927 | ||
5ec3289b DW |
928 | /* |
929 | * If the guest switches to 64-bit mode after setting the runstate | |
930 | * address, that's actually OK. kvm_xen_update_runstate_guest() | |
931 | * will cope. | |
932 | */ | |
933 | if (IS_ENABLED(CONFIG_64BIT) && vcpu->kvm->arch.xen.long_mode) | |
934 | sz = sizeof(struct vcpu_runstate_info); | |
935 | else | |
936 | sz = sizeof(struct compat_vcpu_runstate_info); | |
937 | ||
938 | /* How much fits in the (first) page? */ | |
939 | sz1 = PAGE_SIZE - (data->u.gpa & ~PAGE_MASK); | |
8c82a0b3 ML |
940 | r = kvm_gpc_activate(&vcpu->arch.xen.runstate_cache, |
941 | data->u.gpa, sz1); | |
5ec3289b DW |
942 | if (r) |
943 | goto deactivate_out; | |
944 | ||
945 | /* Either map the second page, or deactivate the second GPC */ | |
946 | if (sz1 >= sz) { | |
8c82a0b3 | 947 | kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache); |
5ec3289b DW |
948 | } else { |
949 | sz2 = sz - sz1; | |
950 | BUG_ON((data->u.gpa + sz1) & ~PAGE_MASK); | |
8c82a0b3 | 951 | r = kvm_gpc_activate(&vcpu->arch.xen.runstate2_cache, |
5ec3289b DW |
952 | data->u.gpa + sz1, sz2); |
953 | if (r) | |
954 | goto deactivate_out; | |
955 | } | |
30b5c851 | 956 | |
5ec3289b DW |
957 | kvm_xen_update_runstate_guest(vcpu, false); |
958 | break; | |
959 | } | |
30b5c851 DW |
960 | case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: |
961 | if (!sched_info_on()) { | |
962 | r = -EOPNOTSUPP; | |
963 | break; | |
964 | } | |
965 | if (data->u.runstate.state > RUNSTATE_offline) { | |
966 | r = -EINVAL; | |
967 | break; | |
968 | } | |
969 | ||
970 | kvm_xen_update_runstate(vcpu, data->u.runstate.state); | |
971 | r = 0; | |
972 | break; | |
973 | ||
974 | case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA: | |
975 | if (!sched_info_on()) { | |
976 | r = -EOPNOTSUPP; | |
977 | break; | |
978 | } | |
979 | if (data->u.runstate.state > RUNSTATE_offline) { | |
980 | r = -EINVAL; | |
981 | break; | |
982 | } | |
983 | if (data->u.runstate.state_entry_time != | |
984 | (data->u.runstate.time_running + | |
985 | data->u.runstate.time_runnable + | |
986 | data->u.runstate.time_blocked + | |
987 | data->u.runstate.time_offline)) { | |
988 | r = -EINVAL; | |
989 | break; | |
990 | } | |
991 | if (get_kvmclock_ns(vcpu->kvm) < | |
992 | data->u.runstate.state_entry_time) { | |
993 | r = -EINVAL; | |
994 | break; | |
995 | } | |
996 | ||
997 | vcpu->arch.xen.current_runstate = data->u.runstate.state; | |
998 | vcpu->arch.xen.runstate_entry_time = | |
999 | data->u.runstate.state_entry_time; | |
1000 | vcpu->arch.xen.runstate_times[RUNSTATE_running] = | |
1001 | data->u.runstate.time_running; | |
1002 | vcpu->arch.xen.runstate_times[RUNSTATE_runnable] = | |
1003 | data->u.runstate.time_runnable; | |
1004 | vcpu->arch.xen.runstate_times[RUNSTATE_blocked] = | |
1005 | data->u.runstate.time_blocked; | |
1006 | vcpu->arch.xen.runstate_times[RUNSTATE_offline] = | |
1007 | data->u.runstate.time_offline; | |
1008 | vcpu->arch.xen.last_steal = current->sched_info.run_delay; | |
1009 | r = 0; | |
1010 | break; | |
1011 | ||
1012 | case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST: | |
1013 | if (!sched_info_on()) { | |
1014 | r = -EOPNOTSUPP; | |
1015 | break; | |
1016 | } | |
1017 | if (data->u.runstate.state > RUNSTATE_offline && | |
1018 | data->u.runstate.state != (u64)-1) { | |
1019 | r = -EINVAL; | |
1020 | break; | |
1021 | } | |
1022 | /* The adjustment must add up */ | |
1023 | if (data->u.runstate.state_entry_time != | |
1024 | (data->u.runstate.time_running + | |
1025 | data->u.runstate.time_runnable + | |
1026 | data->u.runstate.time_blocked + | |
1027 | data->u.runstate.time_offline)) { | |
1028 | r = -EINVAL; | |
1029 | break; | |
1030 | } | |
1031 | ||
1032 | if (get_kvmclock_ns(vcpu->kvm) < | |
1033 | (vcpu->arch.xen.runstate_entry_time + | |
1034 | data->u.runstate.state_entry_time)) { | |
1035 | r = -EINVAL; | |
1036 | break; | |
1037 | } | |
1038 | ||
1039 | vcpu->arch.xen.runstate_entry_time += | |
1040 | data->u.runstate.state_entry_time; | |
1041 | vcpu->arch.xen.runstate_times[RUNSTATE_running] += | |
1042 | data->u.runstate.time_running; | |
1043 | vcpu->arch.xen.runstate_times[RUNSTATE_runnable] += | |
1044 | data->u.runstate.time_runnable; | |
1045 | vcpu->arch.xen.runstate_times[RUNSTATE_blocked] += | |
1046 | data->u.runstate.time_blocked; | |
1047 | vcpu->arch.xen.runstate_times[RUNSTATE_offline] += | |
1048 | data->u.runstate.time_offline; | |
1049 | ||
1050 | if (data->u.runstate.state <= RUNSTATE_offline) | |
1051 | kvm_xen_update_runstate(vcpu, data->u.runstate.state); | |
8acc3518 DW |
1052 | else if (vcpu->arch.xen.runstate_cache.active) |
1053 | kvm_xen_update_runstate_guest(vcpu, false); | |
30b5c851 DW |
1054 | r = 0; |
1055 | break; | |
1056 | ||
942c2490 DW |
1057 | case KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID: |
1058 | if (data->u.vcpu_id >= KVM_MAX_VCPUS) | |
1059 | r = -EINVAL; | |
1060 | else { | |
1061 | vcpu->arch.xen.vcpu_id = data->u.vcpu_id; | |
1062 | r = 0; | |
1063 | } | |
1064 | break; | |
1065 | ||
53639526 | 1066 | case KVM_XEN_VCPU_ATTR_TYPE_TIMER: |
c0368991 CD |
1067 | if (data->u.timer.port && |
1068 | data->u.timer.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) { | |
1069 | r = -EINVAL; | |
1070 | break; | |
53639526 JM |
1071 | } |
1072 | ||
c0368991 CD |
1073 | if (!vcpu->arch.xen.timer.function) |
1074 | kvm_xen_init_timer(vcpu); | |
1075 | ||
1076 | /* Stop the timer (if it's running) before changing the vector */ | |
1077 | kvm_xen_stop_timer(vcpu); | |
1078 | vcpu->arch.xen.timer_virq = data->u.timer.port; | |
1079 | ||
1080 | /* Start the timer if the new value has a valid vector+expiry. */ | |
1081 | if (data->u.timer.port && data->u.timer.expires_ns) | |
451a7078 | 1082 | kvm_xen_start_timer(vcpu, data->u.timer.expires_ns, false); |
c0368991 | 1083 | |
53639526 JM |
1084 | r = 0; |
1085 | break; | |
1086 | ||
fde0451b DW |
1087 | case KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR: |
1088 | if (data->u.vector && data->u.vector < 0x10) | |
1089 | r = -EINVAL; | |
1090 | else { | |
1091 | vcpu->arch.xen.upcall_vector = data->u.vector; | |
1092 | r = 0; | |
1093 | } | |
1094 | break; | |
1095 | ||
3e324615 DW |
1096 | default: |
1097 | break; | |
1098 | } | |
1099 | ||
73e69a86 | 1100 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
310bc395 | 1101 | mutex_unlock(&vcpu->kvm->arch.xen.xen_lock); |
3e324615 DW |
1102 | return r; |
1103 | } | |
1104 | ||
1105 | int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) | |
1106 | { | |
1107 | int r = -ENOENT; | |
1108 | ||
310bc395 | 1109 | mutex_lock(&vcpu->kvm->arch.xen.xen_lock); |
3e324615 DW |
1110 | |
1111 | switch (data->type) { | |
73e69a86 | 1112 | case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: |
3991f358 | 1113 | if (kvm_gpc_is_gpa_active(&vcpu->arch.xen.vcpu_info_cache)) |
73e69a86 | 1114 | data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa; |
0c165b3c | 1115 | else |
b0305c1e | 1116 | data->u.gpa = KVM_XEN_INVALID_GPA; |
0c165b3c | 1117 | r = 0; |
73e69a86 JM |
1118 | break; |
1119 | ||
3991f358 PD |
1120 | case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA: |
1121 | if (kvm_gpc_is_hva_active(&vcpu->arch.xen.vcpu_info_cache)) | |
1122 | data->u.hva = vcpu->arch.xen.vcpu_info_cache.uhva; | |
1123 | else | |
1124 | data->u.hva = 0; | |
1125 | r = 0; | |
1126 | break; | |
1127 | ||
f2340cd9 | 1128 | case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: |
69d413cf | 1129 | if (vcpu->arch.xen.vcpu_time_info_cache.active) |
f2340cd9 | 1130 | data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa; |
0c165b3c | 1131 | else |
b0305c1e | 1132 | data->u.gpa = KVM_XEN_INVALID_GPA; |
0c165b3c | 1133 | r = 0; |
f2340cd9 JM |
1134 | break; |
1135 | ||
30b5c851 DW |
1136 | case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: |
1137 | if (!sched_info_on()) { | |
1138 | r = -EOPNOTSUPP; | |
1139 | break; | |
1140 | } | |
a795cd43 | 1141 | if (vcpu->arch.xen.runstate_cache.active) { |
30b5c851 DW |
1142 | data->u.gpa = vcpu->arch.xen.runstate_cache.gpa; |
1143 | r = 0; | |
1144 | } | |
1145 | break; | |
1146 | ||
1147 | case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: | |
1148 | if (!sched_info_on()) { | |
1149 | r = -EOPNOTSUPP; | |
1150 | break; | |
1151 | } | |
1152 | data->u.runstate.state = vcpu->arch.xen.current_runstate; | |
1153 | r = 0; | |
1154 | break; | |
1155 | ||
1156 | case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA: | |
1157 | if (!sched_info_on()) { | |
1158 | r = -EOPNOTSUPP; | |
1159 | break; | |
1160 | } | |
1161 | data->u.runstate.state = vcpu->arch.xen.current_runstate; | |
1162 | data->u.runstate.state_entry_time = | |
1163 | vcpu->arch.xen.runstate_entry_time; | |
1164 | data->u.runstate.time_running = | |
1165 | vcpu->arch.xen.runstate_times[RUNSTATE_running]; | |
1166 | data->u.runstate.time_runnable = | |
1167 | vcpu->arch.xen.runstate_times[RUNSTATE_runnable]; | |
1168 | data->u.runstate.time_blocked = | |
1169 | vcpu->arch.xen.runstate_times[RUNSTATE_blocked]; | |
1170 | data->u.runstate.time_offline = | |
1171 | vcpu->arch.xen.runstate_times[RUNSTATE_offline]; | |
1172 | r = 0; | |
1173 | break; | |
1174 | ||
1175 | case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST: | |
1176 | r = -EINVAL; | |
1177 | break; | |
1178 | ||
942c2490 DW |
1179 | case KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID: |
1180 | data->u.vcpu_id = vcpu->arch.xen.vcpu_id; | |
1181 | r = 0; | |
1182 | break; | |
1183 | ||
53639526 | 1184 | case KVM_XEN_VCPU_ATTR_TYPE_TIMER: |
77c9b9de DW |
1185 | /* |
1186 | * Ensure a consistent snapshot of state is captured, with a | |
1187 | * timer either being pending, or the event channel delivered | |
1188 | * to the corresponding bit in the shared_info. Not still | |
1189 | * lurking in the timer_pending flag for deferred delivery. | |
1190 | * Purely as an optimisation, if the timer_expires field is | |
1191 | * zero, that means the timer isn't active (or even in the | |
1192 | * timer_pending flag) and there is no need to cancel it. | |
1193 | */ | |
1194 | if (vcpu->arch.xen.timer_expires) { | |
1195 | hrtimer_cancel(&vcpu->arch.xen.timer); | |
1196 | kvm_xen_inject_timer_irqs(vcpu); | |
1197 | } | |
1198 | ||
53639526 JM |
1199 | data->u.timer.port = vcpu->arch.xen.timer_virq; |
1200 | data->u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; | |
1201 | data->u.timer.expires_ns = vcpu->arch.xen.timer_expires; | |
77c9b9de DW |
1202 | |
1203 | /* | |
1204 | * The hrtimer may trigger and raise the IRQ immediately, | |
1205 | * while the returned state causes it to be set up and | |
1206 | * raised again on the destination system after migration. | |
1207 | * That's fine, as the guest won't even have had a chance | |
1208 | * to run and handle the interrupt. Asserting an already | |
1209 | * pending event channel is idempotent. | |
1210 | */ | |
1211 | if (vcpu->arch.xen.timer_expires) | |
1212 | hrtimer_start_expires(&vcpu->arch.xen.timer, | |
1213 | HRTIMER_MODE_ABS_HARD); | |
1214 | ||
53639526 JM |
1215 | r = 0; |
1216 | break; | |
1217 | ||
fde0451b DW |
1218 | case KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR: |
1219 | data->u.vector = vcpu->arch.xen.upcall_vector; | |
1220 | r = 0; | |
1221 | break; | |
1222 | ||
3e324615 DW |
1223 | default: |
1224 | break; | |
1225 | } | |
1226 | ||
310bc395 | 1227 | mutex_unlock(&vcpu->kvm->arch.xen.xen_lock); |
3e324615 DW |
1228 | return r; |
1229 | } | |
1230 | ||
23200b7a JM |
1231 | int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) |
1232 | { | |
1233 | struct kvm *kvm = vcpu->kvm; | |
1234 | u32 page_num = data & ~PAGE_MASK; | |
1235 | u64 page_addr = data & PAGE_MASK; | |
a3833b81 | 1236 | bool lm = is_long_mode(vcpu); |
18b99e4d PD |
1237 | int r = 0; |
1238 | ||
1239 | mutex_lock(&kvm->arch.xen.xen_lock); | |
1240 | if (kvm->arch.xen.long_mode != lm) { | |
1241 | kvm->arch.xen.long_mode = lm; | |
1242 | ||
1243 | /* | |
1244 | * Re-initialize shared_info to put the wallclock in the | |
1245 | * correct place. | |
1246 | */ | |
1247 | if (kvm->arch.xen.shinfo_cache.active && | |
1248 | kvm_xen_shared_info_init(kvm)) | |
1249 | r = 1; | |
1250 | } | |
1251 | mutex_unlock(&kvm->arch.xen.xen_lock); | |
a3833b81 | 1252 | |
18b99e4d PD |
1253 | if (r) |
1254 | return r; | |
23200b7a JM |
1255 | |
1256 | /* | |
1257 | * If Xen hypercall intercept is enabled, fill the hypercall | |
1258 | * page with VMCALL/VMMCALL instructions since that's what | |
1259 | * we catch. Else the VMM has provided the hypercall pages | |
1260 | * with instructions of its own choosing, so use those. | |
1261 | */ | |
1262 | if (kvm_xen_hypercall_enabled(kvm)) { | |
1263 | u8 instructions[32]; | |
1264 | int i; | |
1265 | ||
1266 | if (page_num) | |
1267 | return 1; | |
1268 | ||
1269 | /* mov imm32, %eax */ | |
1270 | instructions[0] = 0xb8; | |
1271 | ||
1272 | /* vmcall / vmmcall */ | |
89604647 | 1273 | kvm_x86_call(patch_hypercall)(vcpu, instructions + 5); |
23200b7a JM |
1274 | |
1275 | /* ret */ | |
1276 | instructions[8] = 0xc3; | |
1277 | ||
1278 | /* int3 to pad */ | |
1279 | memset(instructions + 9, 0xcc, sizeof(instructions) - 9); | |
1280 | ||
1281 | for (i = 0; i < PAGE_SIZE / sizeof(instructions); i++) { | |
1282 | *(u32 *)&instructions[1] = i; | |
1283 | if (kvm_vcpu_write_guest(vcpu, | |
1284 | page_addr + (i * sizeof(instructions)), | |
1285 | instructions, sizeof(instructions))) | |
1286 | return 1; | |
1287 | } | |
1288 | } else { | |
448841f0 SC |
1289 | /* |
1290 | * Note, truncation is a non-issue as 'lm' is guaranteed to be | |
1291 | * false for a 32-bit kernel, i.e. when hva_t is only 4 bytes. | |
1292 | */ | |
1293 | hva_t blob_addr = lm ? kvm->arch.xen_hvm_config.blob_addr_64 | |
1294 | : kvm->arch.xen_hvm_config.blob_addr_32; | |
23200b7a JM |
1295 | u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64 |
1296 | : kvm->arch.xen_hvm_config.blob_size_32; | |
1297 | u8 *page; | |
385407a6 | 1298 | int ret; |
23200b7a JM |
1299 | |
1300 | if (page_num >= blob_size) | |
1301 | return 1; | |
1302 | ||
1303 | blob_addr += page_num * PAGE_SIZE; | |
1304 | ||
1305 | page = memdup_user((u8 __user *)blob_addr, PAGE_SIZE); | |
1306 | if (IS_ERR(page)) | |
1307 | return PTR_ERR(page); | |
1308 | ||
385407a6 ML |
1309 | ret = kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE); |
1310 | kfree(page); | |
1311 | if (ret) | |
23200b7a | 1312 | return 1; |
23200b7a JM |
1313 | } |
1314 | return 0; | |
1315 | } | |
1316 | ||
78e9878c DW |
1317 | int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) |
1318 | { | |
661a20fa DW |
1319 | /* Only some feature flags need to be *enabled* by userspace */ |
1320 | u32 permitted_flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | | |
6d722835 PD |
1321 | KVM_XEN_HVM_CONFIG_EVTCHN_SEND | |
1322 | KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE; | |
1323 | u32 old_flags; | |
661a20fa DW |
1324 | |
1325 | if (xhc->flags & ~permitted_flags) | |
78e9878c DW |
1326 | return -EINVAL; |
1327 | ||
1328 | /* | |
1329 | * With hypercall interception the kernel generates its own | |
1330 | * hypercall page so it must not be provided. | |
1331 | */ | |
1332 | if ((xhc->flags & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) && | |
1333 | (xhc->blob_addr_32 || xhc->blob_addr_64 || | |
1334 | xhc->blob_size_32 || xhc->blob_size_64)) | |
1335 | return -EINVAL; | |
1336 | ||
310bc395 | 1337 | mutex_lock(&kvm->arch.xen.xen_lock); |
7d6bbebb DW |
1338 | |
1339 | if (xhc->msr && !kvm->arch.xen_hvm_config.msr) | |
1340 | static_branch_inc(&kvm_xen_enabled.key); | |
1341 | else if (!xhc->msr && kvm->arch.xen_hvm_config.msr) | |
1342 | static_branch_slow_dec_deferred(&kvm_xen_enabled); | |
1343 | ||
6d722835 | 1344 | old_flags = kvm->arch.xen_hvm_config.flags; |
78e9878c | 1345 | memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc)); |
7d6bbebb | 1346 | |
310bc395 | 1347 | mutex_unlock(&kvm->arch.xen.xen_lock); |
6d722835 PD |
1348 | |
1349 | if ((old_flags ^ xhc->flags) & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE) | |
1350 | kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE); | |
1351 | ||
78e9878c DW |
1352 | return 0; |
1353 | } | |
1354 | ||
23200b7a JM |
1355 | static int kvm_xen_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) |
1356 | { | |
1357 | kvm_rax_write(vcpu, result); | |
1358 | return kvm_skip_emulated_instruction(vcpu); | |
1359 | } | |
1360 | ||
1361 | static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu) | |
1362 | { | |
1363 | struct kvm_run *run = vcpu->run; | |
1364 | ||
1365 | if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.xen.hypercall_rip))) | |
1366 | return 1; | |
1367 | ||
1368 | return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result); | |
1369 | } | |
1370 | ||
4ea9439f DW |
1371 | static inline int max_evtchn_port(struct kvm *kvm) |
1372 | { | |
1373 | if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) | |
1374 | return EVTCHN_2L_NR_CHANNELS; | |
1375 | else | |
1376 | return COMPAT_EVTCHN_2L_NR_CHANNELS; | |
1377 | } | |
1378 | ||
1a65105a BO |
1379 | static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, |
1380 | evtchn_port_t *ports) | |
1381 | { | |
1382 | struct kvm *kvm = vcpu->kvm; | |
1383 | struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; | |
1384 | unsigned long *pending_bits; | |
1385 | unsigned long flags; | |
1386 | bool ret = true; | |
1387 | int idx, i; | |
1388 | ||
1a65105a | 1389 | idx = srcu_read_lock(&kvm->srcu); |
4265df66 | 1390 | read_lock_irqsave(&gpc->lock, flags); |
58f5ee5f | 1391 | if (!kvm_gpc_check(gpc, PAGE_SIZE)) |
1a65105a BO |
1392 | goto out_rcu; |
1393 | ||
1394 | ret = false; | |
1395 | if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { | |
1396 | struct shared_info *shinfo = gpc->khva; | |
1397 | pending_bits = (unsigned long *)&shinfo->evtchn_pending; | |
1398 | } else { | |
1399 | struct compat_shared_info *shinfo = gpc->khva; | |
1400 | pending_bits = (unsigned long *)&shinfo->evtchn_pending; | |
1401 | } | |
1402 | ||
1403 | for (i = 0; i < nr_ports; i++) { | |
1404 | if (test_bit(ports[i], pending_bits)) { | |
1405 | ret = true; | |
1406 | break; | |
1407 | } | |
1408 | } | |
1409 | ||
1410 | out_rcu: | |
1a65105a | 1411 | read_unlock_irqrestore(&gpc->lock, flags); |
4265df66 | 1412 | srcu_read_unlock(&kvm->srcu, idx); |
1a65105a BO |
1413 | |
1414 | return ret; | |
1415 | } | |
1416 | ||
1417 | static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, | |
1418 | u64 param, u64 *r) | |
1419 | { | |
1a65105a BO |
1420 | struct sched_poll sched_poll; |
1421 | evtchn_port_t port, *ports; | |
92c58965 DW |
1422 | struct x86_exception e; |
1423 | int i; | |
1a65105a | 1424 | |
214b0a88 | 1425 | if (!lapic_in_kernel(vcpu) || |
1a65105a BO |
1426 | !(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND)) |
1427 | return false; | |
1428 | ||
214b0a88 MK |
1429 | if (IS_ENABLED(CONFIG_64BIT) && !longmode) { |
1430 | struct compat_sched_poll sp32; | |
1431 | ||
1432 | /* Sanity check that the compat struct definition is correct */ | |
1433 | BUILD_BUG_ON(sizeof(sp32) != 16); | |
1434 | ||
92c58965 | 1435 | if (kvm_read_guest_virt(vcpu, param, &sp32, sizeof(sp32), &e)) { |
214b0a88 MK |
1436 | *r = -EFAULT; |
1437 | return true; | |
1438 | } | |
1439 | ||
1440 | /* | |
1441 | * This is a 32-bit pointer to an array of evtchn_port_t which | |
1442 | * are uint32_t, so once it's converted no further compat | |
1443 | * handling is needed. | |
1444 | */ | |
1445 | sched_poll.ports = (void *)(unsigned long)(sp32.ports); | |
1446 | sched_poll.nr_ports = sp32.nr_ports; | |
1447 | sched_poll.timeout = sp32.timeout; | |
1448 | } else { | |
92c58965 DW |
1449 | if (kvm_read_guest_virt(vcpu, param, &sched_poll, |
1450 | sizeof(sched_poll), &e)) { | |
214b0a88 MK |
1451 | *r = -EFAULT; |
1452 | return true; | |
1453 | } | |
1454 | } | |
1455 | ||
1a65105a BO |
1456 | if (unlikely(sched_poll.nr_ports > 1)) { |
1457 | /* Xen (unofficially) limits number of pollers to 128 */ | |
1458 | if (sched_poll.nr_ports > 128) { | |
1459 | *r = -EINVAL; | |
1460 | return true; | |
1461 | } | |
1462 | ||
1463 | ports = kmalloc_array(sched_poll.nr_ports, | |
1464 | sizeof(*ports), GFP_KERNEL); | |
1465 | if (!ports) { | |
1466 | *r = -ENOMEM; | |
1467 | return true; | |
1468 | } | |
1469 | } else | |
1470 | ports = &port; | |
1471 | ||
92c58965 DW |
1472 | if (kvm_read_guest_virt(vcpu, (gva_t)sched_poll.ports, ports, |
1473 | sched_poll.nr_ports * sizeof(*ports), &e)) { | |
1474 | *r = -EFAULT; | |
1475 | return true; | |
1476 | } | |
1477 | ||
1a65105a | 1478 | for (i = 0; i < sched_poll.nr_ports; i++) { |
4ea9439f DW |
1479 | if (ports[i] >= max_evtchn_port(vcpu->kvm)) { |
1480 | *r = -EINVAL; | |
1481 | goto out; | |
1482 | } | |
1a65105a BO |
1483 | } |
1484 | ||
1485 | if (sched_poll.nr_ports == 1) | |
1486 | vcpu->arch.xen.poll_evtchn = port; | |
1487 | else | |
1488 | vcpu->arch.xen.poll_evtchn = -1; | |
1489 | ||
79f772b9 | 1490 | set_bit(vcpu->vcpu_idx, vcpu->kvm->arch.xen.poll_mask); |
1a65105a BO |
1491 | |
1492 | if (!wait_pending_event(vcpu, sched_poll.nr_ports, ports)) { | |
1493 | vcpu->arch.mp_state = KVM_MP_STATE_HALTED; | |
1494 | ||
1495 | if (sched_poll.timeout) | |
1496 | mod_timer(&vcpu->arch.xen.poll_timer, | |
1497 | jiffies + nsecs_to_jiffies(sched_poll.timeout)); | |
1498 | ||
1499 | kvm_vcpu_halt(vcpu); | |
1500 | ||
1501 | if (sched_poll.timeout) | |
1502 | del_timer(&vcpu->arch.xen.poll_timer); | |
1503 | ||
1504 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | |
1a65105a BO |
1505 | } |
1506 | ||
1507 | vcpu->arch.xen.poll_evtchn = 0; | |
1508 | *r = 0; | |
1509 | out: | |
1510 | /* Really, this is only needed in case of timeout */ | |
79f772b9 | 1511 | clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.xen.poll_mask); |
1a65105a BO |
1512 | |
1513 | if (unlikely(sched_poll.nr_ports > 1)) | |
1514 | kfree(ports); | |
1515 | return true; | |
1516 | } | |
1517 | ||
1518 | static void cancel_evtchn_poll(struct timer_list *t) | |
1519 | { | |
1520 | struct kvm_vcpu *vcpu = from_timer(vcpu, t, arch.xen.poll_timer); | |
1521 | ||
1522 | kvm_make_request(KVM_REQ_UNBLOCK, vcpu); | |
1523 | kvm_vcpu_kick(vcpu); | |
1524 | } | |
1525 | ||
1526 | static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, bool longmode, | |
1527 | int cmd, u64 param, u64 *r) | |
0ec6c5c5 JM |
1528 | { |
1529 | switch (cmd) { | |
1a65105a BO |
1530 | case SCHEDOP_poll: |
1531 | if (kvm_xen_schedop_poll(vcpu, longmode, param, r)) | |
1532 | return true; | |
1533 | fallthrough; | |
0ec6c5c5 JM |
1534 | case SCHEDOP_yield: |
1535 | kvm_vcpu_on_spin(vcpu, true); | |
1536 | *r = 0; | |
1537 | return true; | |
1538 | default: | |
1539 | break; | |
1540 | } | |
1541 | ||
1542 | return false; | |
1543 | } | |
1544 | ||
53639526 JM |
1545 | struct compat_vcpu_set_singleshot_timer { |
1546 | uint64_t timeout_abs_ns; | |
1547 | uint32_t flags; | |
1548 | } __attribute__((packed)); | |
1549 | ||
1550 | static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd, | |
1551 | int vcpu_id, u64 param, u64 *r) | |
1552 | { | |
1553 | struct vcpu_set_singleshot_timer oneshot; | |
92c58965 | 1554 | struct x86_exception e; |
53639526 JM |
1555 | |
1556 | if (!kvm_xen_timer_enabled(vcpu)) | |
1557 | return false; | |
1558 | ||
1559 | switch (cmd) { | |
1560 | case VCPUOP_set_singleshot_timer: | |
1561 | if (vcpu->arch.xen.vcpu_id != vcpu_id) { | |
1562 | *r = -EINVAL; | |
1563 | return true; | |
1564 | } | |
53639526 JM |
1565 | |
1566 | /* | |
1567 | * The only difference for 32-bit compat is the 4 bytes of | |
1568 | * padding after the interesting part of the structure. So | |
1569 | * for a faithful emulation of Xen we have to *try* to copy | |
1570 | * the padding and return -EFAULT if we can't. Otherwise we | |
1571 | * might as well just have copied the 12-byte 32-bit struct. | |
1572 | */ | |
1573 | BUILD_BUG_ON(offsetof(struct compat_vcpu_set_singleshot_timer, timeout_abs_ns) != | |
1574 | offsetof(struct vcpu_set_singleshot_timer, timeout_abs_ns)); | |
1575 | BUILD_BUG_ON(sizeof_field(struct compat_vcpu_set_singleshot_timer, timeout_abs_ns) != | |
1576 | sizeof_field(struct vcpu_set_singleshot_timer, timeout_abs_ns)); | |
1577 | BUILD_BUG_ON(offsetof(struct compat_vcpu_set_singleshot_timer, flags) != | |
1578 | offsetof(struct vcpu_set_singleshot_timer, flags)); | |
1579 | BUILD_BUG_ON(sizeof_field(struct compat_vcpu_set_singleshot_timer, flags) != | |
1580 | sizeof_field(struct vcpu_set_singleshot_timer, flags)); | |
1581 | ||
92c58965 DW |
1582 | if (kvm_read_guest_virt(vcpu, param, &oneshot, longmode ? sizeof(oneshot) : |
1583 | sizeof(struct compat_vcpu_set_singleshot_timer), &e)) { | |
53639526 JM |
1584 | *r = -EFAULT; |
1585 | return true; | |
1586 | } | |
1587 | ||
451a7078 | 1588 | kvm_xen_start_timer(vcpu, oneshot.timeout_abs_ns, false); |
53639526 JM |
1589 | *r = 0; |
1590 | return true; | |
1591 | ||
1592 | case VCPUOP_stop_singleshot_timer: | |
1593 | if (vcpu->arch.xen.vcpu_id != vcpu_id) { | |
1594 | *r = -EINVAL; | |
1595 | return true; | |
1596 | } | |
1597 | kvm_xen_stop_timer(vcpu); | |
1598 | *r = 0; | |
1599 | return true; | |
1600 | } | |
1601 | ||
1602 | return false; | |
1603 | } | |
1604 | ||
1605 | static bool kvm_xen_hcall_set_timer_op(struct kvm_vcpu *vcpu, uint64_t timeout, | |
1606 | u64 *r) | |
1607 | { | |
1608 | if (!kvm_xen_timer_enabled(vcpu)) | |
1609 | return false; | |
1610 | ||
451a7078 DW |
1611 | if (timeout) |
1612 | kvm_xen_start_timer(vcpu, timeout, true); | |
1613 | else | |
53639526 | 1614 | kvm_xen_stop_timer(vcpu); |
53639526 JM |
1615 | |
1616 | *r = 0; | |
1617 | return true; | |
1618 | } | |
1619 | ||
23200b7a JM |
1620 | int kvm_xen_hypercall(struct kvm_vcpu *vcpu) |
1621 | { | |
1622 | bool longmode; | |
2fd6df2f JM |
1623 | u64 input, params[6], r = -ENOSYS; |
1624 | bool handled = false; | |
c2b8cdfa | 1625 | u8 cpl; |
23200b7a JM |
1626 | |
1627 | input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX); | |
1628 | ||
79033beb JM |
1629 | /* Hyper-V hypercalls get bit 31 set in EAX */ |
1630 | if ((input & 0x80000000) && | |
8f014550 | 1631 | kvm_hv_hypercall_enabled(vcpu)) |
79033beb JM |
1632 | return kvm_hv_hypercall(vcpu); |
1633 | ||
b5aead00 | 1634 | longmode = is_64_bit_hypercall(vcpu); |
23200b7a JM |
1635 | if (!longmode) { |
1636 | params[0] = (u32)kvm_rbx_read(vcpu); | |
1637 | params[1] = (u32)kvm_rcx_read(vcpu); | |
1638 | params[2] = (u32)kvm_rdx_read(vcpu); | |
1639 | params[3] = (u32)kvm_rsi_read(vcpu); | |
1640 | params[4] = (u32)kvm_rdi_read(vcpu); | |
1641 | params[5] = (u32)kvm_rbp_read(vcpu); | |
1642 | } | |
1643 | #ifdef CONFIG_X86_64 | |
1644 | else { | |
1645 | params[0] = (u64)kvm_rdi_read(vcpu); | |
1646 | params[1] = (u64)kvm_rsi_read(vcpu); | |
1647 | params[2] = (u64)kvm_rdx_read(vcpu); | |
1648 | params[3] = (u64)kvm_r10_read(vcpu); | |
1649 | params[4] = (u64)kvm_r8_read(vcpu); | |
1650 | params[5] = (u64)kvm_r9_read(vcpu); | |
1651 | } | |
1652 | #endif | |
89604647 | 1653 | cpl = kvm_x86_call(get_cpl)(vcpu); |
c3f37199 | 1654 | trace_kvm_xen_hypercall(cpl, input, params[0], params[1], params[2], |
23200b7a JM |
1655 | params[3], params[4], params[5]); |
1656 | ||
c2b8cdfa DW |
1657 | /* |
1658 | * Only allow hypercall acceleration for CPL0. The rare hypercalls that | |
1659 | * are permitted in guest userspace can be handled by the VMM. | |
1660 | */ | |
1661 | if (unlikely(cpl > 0)) | |
1662 | goto handle_in_userspace; | |
1663 | ||
2fd6df2f | 1664 | switch (input) { |
28d1629f DW |
1665 | case __HYPERVISOR_xen_version: |
1666 | if (params[0] == XENVER_version && vcpu->kvm->arch.xen.xen_version) { | |
1667 | r = vcpu->kvm->arch.xen.xen_version; | |
1668 | handled = true; | |
1669 | } | |
1670 | break; | |
2fd6df2f JM |
1671 | case __HYPERVISOR_event_channel_op: |
1672 | if (params[0] == EVTCHNOP_send) | |
1673 | handled = kvm_xen_hcall_evtchn_send(vcpu, params[1], &r); | |
1674 | break; | |
0ec6c5c5 | 1675 | case __HYPERVISOR_sched_op: |
1a65105a BO |
1676 | handled = kvm_xen_hcall_sched_op(vcpu, longmode, params[0], |
1677 | params[1], &r); | |
0ec6c5c5 | 1678 | break; |
53639526 JM |
1679 | case __HYPERVISOR_vcpu_op: |
1680 | handled = kvm_xen_hcall_vcpu_op(vcpu, longmode, params[0], params[1], | |
1681 | params[2], &r); | |
1682 | break; | |
1683 | case __HYPERVISOR_set_timer_op: { | |
1684 | u64 timeout = params[0]; | |
1685 | /* In 32-bit mode, the 64-bit timeout is in two 32-bit params. */ | |
1686 | if (!longmode) | |
1687 | timeout |= params[1] << 32; | |
1688 | handled = kvm_xen_hcall_set_timer_op(vcpu, timeout, &r); | |
1689 | break; | |
1690 | } | |
2fd6df2f JM |
1691 | default: |
1692 | break; | |
1693 | } | |
1694 | ||
1695 | if (handled) | |
1696 | return kvm_xen_hypercall_set_result(vcpu, r); | |
1697 | ||
c2b8cdfa | 1698 | handle_in_userspace: |
23200b7a JM |
1699 | vcpu->run->exit_reason = KVM_EXIT_XEN; |
1700 | vcpu->run->xen.type = KVM_EXIT_XEN_HCALL; | |
1701 | vcpu->run->xen.u.hcall.longmode = longmode; | |
c2b8cdfa | 1702 | vcpu->run->xen.u.hcall.cpl = cpl; |
23200b7a JM |
1703 | vcpu->run->xen.u.hcall.input = input; |
1704 | vcpu->run->xen.u.hcall.params[0] = params[0]; | |
1705 | vcpu->run->xen.u.hcall.params[1] = params[1]; | |
1706 | vcpu->run->xen.u.hcall.params[2] = params[2]; | |
1707 | vcpu->run->xen.u.hcall.params[3] = params[3]; | |
1708 | vcpu->run->xen.u.hcall.params[4] = params[4]; | |
1709 | vcpu->run->xen.u.hcall.params[5] = params[5]; | |
1710 | vcpu->arch.xen.hypercall_rip = kvm_get_linear_rip(vcpu); | |
1711 | vcpu->arch.complete_userspace_io = | |
1712 | kvm_xen_hypercall_complete_userspace; | |
1713 | ||
1714 | return 0; | |
1715 | } | |
14243b38 | 1716 | |
1a65105a BO |
1717 | static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port) |
1718 | { | |
1719 | int poll_evtchn = vcpu->arch.xen.poll_evtchn; | |
1720 | ||
1721 | if ((poll_evtchn == port || poll_evtchn == -1) && | |
79f772b9 | 1722 | test_and_clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.xen.poll_mask)) { |
1a65105a BO |
1723 | kvm_make_request(KVM_REQ_UNBLOCK, vcpu); |
1724 | kvm_vcpu_kick(vcpu); | |
1725 | } | |
1726 | } | |
1727 | ||
14243b38 | 1728 | /* |
8733068b DW |
1729 | * The return value from this function is propagated to kvm_set_irq() API, |
1730 | * so it returns: | |
14243b38 DW |
1731 | * < 0 Interrupt was ignored (masked or not delivered for other reasons) |
1732 | * = 0 Interrupt was coalesced (previous irq is still pending) | |
1733 | * > 0 Number of CPUs interrupt was delivered to | |
8733068b DW |
1734 | * |
1735 | * It is also called directly from kvm_arch_set_irq_inatomic(), where the | |
1736 | * only check on its return value is a comparison with -EWOULDBLOCK'. | |
14243b38 | 1737 | */ |
8733068b | 1738 | int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) |
14243b38 DW |
1739 | { |
1740 | struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; | |
1741 | struct kvm_vcpu *vcpu; | |
1742 | unsigned long *pending_bits, *mask_bits; | |
1743 | unsigned long flags; | |
1744 | int port_word_bit; | |
1745 | bool kick_vcpu = false; | |
8733068b | 1746 | int vcpu_idx, idx, rc; |
14243b38 | 1747 | |
8733068b DW |
1748 | vcpu_idx = READ_ONCE(xe->vcpu_idx); |
1749 | if (vcpu_idx >= 0) | |
1750 | vcpu = kvm_get_vcpu(kvm, vcpu_idx); | |
1751 | else { | |
1752 | vcpu = kvm_get_vcpu_by_id(kvm, xe->vcpu_id); | |
1753 | if (!vcpu) | |
1754 | return -EINVAL; | |
79f772b9 | 1755 | WRITE_ONCE(xe->vcpu_idx, vcpu->vcpu_idx); |
8733068b | 1756 | } |
14243b38 | 1757 | |
8733068b DW |
1758 | if (xe->port >= max_evtchn_port(kvm)) |
1759 | return -EINVAL; | |
14243b38 DW |
1760 | |
1761 | rc = -EWOULDBLOCK; | |
14243b38 DW |
1762 | |
1763 | idx = srcu_read_lock(&kvm->srcu); | |
7caf9571 DW |
1764 | |
1765 | read_lock_irqsave(&gpc->lock, flags); | |
58f5ee5f | 1766 | if (!kvm_gpc_check(gpc, PAGE_SIZE)) |
14243b38 DW |
1767 | goto out_rcu; |
1768 | ||
1769 | if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { | |
1770 | struct shared_info *shinfo = gpc->khva; | |
1771 | pending_bits = (unsigned long *)&shinfo->evtchn_pending; | |
1772 | mask_bits = (unsigned long *)&shinfo->evtchn_mask; | |
8733068b | 1773 | port_word_bit = xe->port / 64; |
14243b38 DW |
1774 | } else { |
1775 | struct compat_shared_info *shinfo = gpc->khva; | |
1776 | pending_bits = (unsigned long *)&shinfo->evtchn_pending; | |
1777 | mask_bits = (unsigned long *)&shinfo->evtchn_mask; | |
8733068b | 1778 | port_word_bit = xe->port / 32; |
14243b38 DW |
1779 | } |
1780 | ||
1781 | /* | |
1782 | * If this port wasn't already set, and if it isn't masked, then | |
1783 | * we try to set the corresponding bit in the in-kernel shadow of | |
1784 | * evtchn_pending_sel for the target vCPU. And if *that* wasn't | |
1785 | * already set, then we kick the vCPU in question to write to the | |
1786 | * *real* evtchn_pending_sel in its own guest vcpu_info struct. | |
1787 | */ | |
8733068b | 1788 | if (test_and_set_bit(xe->port, pending_bits)) { |
14243b38 | 1789 | rc = 0; /* It was already raised */ |
8733068b DW |
1790 | } else if (test_bit(xe->port, mask_bits)) { |
1791 | rc = -ENOTCONN; /* Masked */ | |
1a65105a | 1792 | kvm_xen_check_poller(vcpu, xe->port); |
14243b38 | 1793 | } else { |
7caf9571 DW |
1794 | rc = 1; /* Delivered to the bitmap in shared_info. */ |
1795 | /* Now switch to the vCPU's vcpu_info to set the index and pending_sel */ | |
1796 | read_unlock_irqrestore(&gpc->lock, flags); | |
1797 | gpc = &vcpu->arch.xen.vcpu_info_cache; | |
1798 | ||
1799 | read_lock_irqsave(&gpc->lock, flags); | |
58f5ee5f | 1800 | if (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { |
7caf9571 DW |
1801 | /* |
1802 | * Could not access the vcpu_info. Set the bit in-kernel | |
1803 | * and prod the vCPU to deliver it for itself. | |
1804 | */ | |
1805 | if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel)) | |
1806 | kick_vcpu = true; | |
1807 | goto out_rcu; | |
1808 | } | |
1809 | ||
1810 | if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { | |
1811 | struct vcpu_info *vcpu_info = gpc->khva; | |
1812 | if (!test_and_set_bit(port_word_bit, &vcpu_info->evtchn_pending_sel)) { | |
1813 | WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1); | |
1814 | kick_vcpu = true; | |
1815 | } | |
1816 | } else { | |
1817 | struct compat_vcpu_info *vcpu_info = gpc->khva; | |
1818 | if (!test_and_set_bit(port_word_bit, | |
1819 | (unsigned long *)&vcpu_info->evtchn_pending_sel)) { | |
1820 | WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1); | |
1821 | kick_vcpu = true; | |
1822 | } | |
1823 | } | |
fde0451b DW |
1824 | |
1825 | /* For the per-vCPU lapic vector, deliver it as MSI. */ | |
1826 | if (kick_vcpu && vcpu->arch.xen.upcall_vector) { | |
1827 | kvm_xen_inject_vcpu_vector(vcpu); | |
1828 | kick_vcpu = false; | |
1829 | } | |
14243b38 DW |
1830 | } |
1831 | ||
1832 | out_rcu: | |
14243b38 | 1833 | read_unlock_irqrestore(&gpc->lock, flags); |
7caf9571 | 1834 | srcu_read_unlock(&kvm->srcu, idx); |
14243b38 DW |
1835 | |
1836 | if (kick_vcpu) { | |
7caf9571 | 1837 | kvm_make_request(KVM_REQ_UNBLOCK, vcpu); |
14243b38 DW |
1838 | kvm_vcpu_kick(vcpu); |
1839 | } | |
1840 | ||
1841 | return rc; | |
1842 | } | |
1843 | ||
8733068b | 1844 | static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) |
14243b38 DW |
1845 | { |
1846 | bool mm_borrowed = false; | |
1847 | int rc; | |
1848 | ||
8733068b | 1849 | rc = kvm_xen_set_evtchn_fast(xe, kvm); |
14243b38 DW |
1850 | if (rc != -EWOULDBLOCK) |
1851 | return rc; | |
1852 | ||
1853 | if (current->mm != kvm->mm) { | |
1854 | /* | |
1855 | * If not on a thread which already belongs to this KVM, | |
1856 | * we'd better be in the irqfd workqueue. | |
1857 | */ | |
1858 | if (WARN_ON_ONCE(current->mm)) | |
1859 | return -EINVAL; | |
1860 | ||
1861 | kthread_use_mm(kvm->mm); | |
1862 | mm_borrowed = true; | |
1863 | } | |
1864 | ||
14243b38 DW |
1865 | /* |
1866 | * It is theoretically possible for the page to be unmapped | |
1867 | * and the MMU notifier to invalidate the shared_info before | |
1868 | * we even get to use it. In that case, this looks like an | |
1869 | * infinite loop. It was tempting to do it via the userspace | |
1870 | * HVA instead... but that just *hides* the fact that it's | |
1871 | * an infinite loop, because if a fault occurs and it waits | |
1872 | * for the page to come back, it can *still* immediately | |
1873 | * fault and have to wait again, repeatedly. | |
1874 | * | |
1875 | * Conversely, the page could also have been reinstated by | |
1876 | * another thread before we even obtain the mutex above, so | |
1877 | * check again *first* before remapping it. | |
1878 | */ | |
1879 | do { | |
1880 | struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; | |
1881 | int idx; | |
1882 | ||
8733068b | 1883 | rc = kvm_xen_set_evtchn_fast(xe, kvm); |
14243b38 DW |
1884 | if (rc != -EWOULDBLOCK) |
1885 | break; | |
1886 | ||
1887 | idx = srcu_read_lock(&kvm->srcu); | |
58f5ee5f | 1888 | rc = kvm_gpc_refresh(gpc, PAGE_SIZE); |
14243b38 DW |
1889 | srcu_read_unlock(&kvm->srcu, idx); |
1890 | } while(!rc); | |
1891 | ||
14243b38 DW |
1892 | if (mm_borrowed) |
1893 | kthread_unuse_mm(kvm->mm); | |
1894 | ||
1895 | return rc; | |
1896 | } | |
1897 | ||
8733068b DW |
1898 | /* This is the version called from kvm_set_irq() as the .set function */ |
1899 | static int evtchn_set_fn(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, | |
1900 | int irq_source_id, int level, bool line_status) | |
1901 | { | |
1902 | if (!level) | |
1903 | return -EINVAL; | |
1904 | ||
1905 | return kvm_xen_set_evtchn(&e->xen_evtchn, kvm); | |
1906 | } | |
1907 | ||
1908 | /* | |
1909 | * Set up an event channel interrupt from the KVM IRQ routing table. | |
1910 | * Used for e.g. PIRQ from passed through physical devices. | |
1911 | */ | |
14243b38 DW |
1912 | int kvm_xen_setup_evtchn(struct kvm *kvm, |
1913 | struct kvm_kernel_irq_routing_entry *e, | |
1914 | const struct kvm_irq_routing_entry *ue) | |
1915 | ||
1916 | { | |
8733068b DW |
1917 | struct kvm_vcpu *vcpu; |
1918 | ||
14243b38 DW |
1919 | if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm)) |
1920 | return -EINVAL; | |
1921 | ||
1922 | /* We only support 2 level event channels for now */ | |
1923 | if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) | |
1924 | return -EINVAL; | |
1925 | ||
8733068b DW |
1926 | /* |
1927 | * Xen gives us interesting mappings from vCPU index to APIC ID, | |
1928 | * which means kvm_get_vcpu_by_id() has to iterate over all vCPUs | |
1929 | * to find it. Do that once at setup time, instead of every time. | |
1930 | * But beware that on live update / live migration, the routing | |
1931 | * table might be reinstated before the vCPU threads have finished | |
1932 | * recreating their vCPUs. | |
1933 | */ | |
1934 | vcpu = kvm_get_vcpu_by_id(kvm, ue->u.xen_evtchn.vcpu); | |
1935 | if (vcpu) | |
79f772b9 | 1936 | e->xen_evtchn.vcpu_idx = vcpu->vcpu_idx; |
8733068b DW |
1937 | else |
1938 | e->xen_evtchn.vcpu_idx = -1; | |
1939 | ||
14243b38 | 1940 | e->xen_evtchn.port = ue->u.xen_evtchn.port; |
8733068b | 1941 | e->xen_evtchn.vcpu_id = ue->u.xen_evtchn.vcpu; |
14243b38 DW |
1942 | e->xen_evtchn.priority = ue->u.xen_evtchn.priority; |
1943 | e->set = evtchn_set_fn; | |
1944 | ||
1945 | return 0; | |
1946 | } | |
a795cd43 | 1947 | |
35025735 DW |
1948 | /* |
1949 | * Explicit event sending from userspace with KVM_XEN_HVM_EVTCHN_SEND ioctl. | |
1950 | */ | |
1951 | int kvm_xen_hvm_evtchn_send(struct kvm *kvm, struct kvm_irq_routing_xen_evtchn *uxe) | |
1952 | { | |
1953 | struct kvm_xen_evtchn e; | |
1954 | int ret; | |
1955 | ||
1956 | if (!uxe->port || uxe->port >= max_evtchn_port(kvm)) | |
1957 | return -EINVAL; | |
1958 | ||
1959 | /* We only support 2 level event channels for now */ | |
1960 | if (uxe->priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) | |
1961 | return -EINVAL; | |
1962 | ||
1963 | e.port = uxe->port; | |
1964 | e.vcpu_id = uxe->vcpu; | |
1965 | e.vcpu_idx = -1; | |
1966 | e.priority = uxe->priority; | |
1967 | ||
1968 | ret = kvm_xen_set_evtchn(&e, kvm); | |
1969 | ||
1970 | /* | |
1971 | * None of that 'return 1 if it actually got delivered' nonsense. | |
1972 | * We don't care if it was masked (-ENOTCONN) either. | |
1973 | */ | |
1974 | if (ret > 0 || ret == -ENOTCONN) | |
1975 | ret = 0; | |
1976 | ||
1977 | return ret; | |
1978 | } | |
1979 | ||
2fd6df2f JM |
1980 | /* |
1981 | * Support for *outbound* event channel events via the EVTCHNOP_send hypercall. | |
1982 | */ | |
1983 | struct evtchnfd { | |
1984 | u32 send_port; | |
1985 | u32 type; | |
1986 | union { | |
1987 | struct kvm_xen_evtchn port; | |
1988 | struct { | |
1989 | u32 port; /* zero */ | |
1990 | struct eventfd_ctx *ctx; | |
1991 | } eventfd; | |
1992 | } deliver; | |
1993 | }; | |
1994 | ||
1995 | /* | |
1996 | * Update target vCPU or priority for a registered sending channel. | |
1997 | */ | |
1998 | static int kvm_xen_eventfd_update(struct kvm *kvm, | |
1999 | struct kvm_xen_hvm_attr *data) | |
2000 | { | |
2001 | u32 port = data->u.evtchn.send_port; | |
2002 | struct evtchnfd *evtchnfd; | |
70eae030 | 2003 | int ret; |
2fd6df2f | 2004 | |
70eae030 | 2005 | /* Protect writes to evtchnfd as well as the idr lookup. */ |
310bc395 | 2006 | mutex_lock(&kvm->arch.xen.xen_lock); |
2fd6df2f | 2007 | evtchnfd = idr_find(&kvm->arch.xen.evtchn_ports, port); |
2fd6df2f | 2008 | |
70eae030 | 2009 | ret = -ENOENT; |
2fd6df2f | 2010 | if (!evtchnfd) |
70eae030 | 2011 | goto out_unlock; |
2fd6df2f JM |
2012 | |
2013 | /* For an UPDATE, nothing may change except the priority/vcpu */ | |
70eae030 | 2014 | ret = -EINVAL; |
2fd6df2f | 2015 | if (evtchnfd->type != data->u.evtchn.type) |
70eae030 | 2016 | goto out_unlock; |
2fd6df2f JM |
2017 | |
2018 | /* | |
2019 | * Port cannot change, and if it's zero that was an eventfd | |
2020 | * which can't be changed either. | |
2021 | */ | |
2022 | if (!evtchnfd->deliver.port.port || | |
2023 | evtchnfd->deliver.port.port != data->u.evtchn.deliver.port.port) | |
70eae030 | 2024 | goto out_unlock; |
2fd6df2f JM |
2025 | |
2026 | /* We only support 2 level event channels for now */ | |
2027 | if (data->u.evtchn.deliver.port.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) | |
70eae030 | 2028 | goto out_unlock; |
2fd6df2f | 2029 | |
2fd6df2f JM |
2030 | evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority; |
2031 | if (evtchnfd->deliver.port.vcpu_id != data->u.evtchn.deliver.port.vcpu) { | |
2032 | evtchnfd->deliver.port.vcpu_id = data->u.evtchn.deliver.port.vcpu; | |
2033 | evtchnfd->deliver.port.vcpu_idx = -1; | |
2034 | } | |
70eae030 PB |
2035 | ret = 0; |
2036 | out_unlock: | |
310bc395 | 2037 | mutex_unlock(&kvm->arch.xen.xen_lock); |
70eae030 | 2038 | return ret; |
2fd6df2f JM |
2039 | } |
2040 | ||
2041 | /* | |
2042 | * Configure the target (eventfd or local port delivery) for sending on | |
2043 | * a given event channel. | |
2044 | */ | |
2045 | static int kvm_xen_eventfd_assign(struct kvm *kvm, | |
2046 | struct kvm_xen_hvm_attr *data) | |
2047 | { | |
2048 | u32 port = data->u.evtchn.send_port; | |
2049 | struct eventfd_ctx *eventfd = NULL; | |
1c14faa5 | 2050 | struct evtchnfd *evtchnfd; |
2fd6df2f JM |
2051 | int ret = -EINVAL; |
2052 | ||
2fd6df2f JM |
2053 | evtchnfd = kzalloc(sizeof(struct evtchnfd), GFP_KERNEL); |
2054 | if (!evtchnfd) | |
2055 | return -ENOMEM; | |
2056 | ||
2057 | switch(data->u.evtchn.type) { | |
2058 | case EVTCHNSTAT_ipi: | |
2059 | /* IPI must map back to the same port# */ | |
2060 | if (data->u.evtchn.deliver.port.port != data->u.evtchn.send_port) | |
73536338 | 2061 | goto out_noeventfd; /* -EINVAL */ |
2fd6df2f JM |
2062 | break; |
2063 | ||
2064 | case EVTCHNSTAT_interdomain: | |
2065 | if (data->u.evtchn.deliver.port.port) { | |
2066 | if (data->u.evtchn.deliver.port.port >= max_evtchn_port(kvm)) | |
73536338 | 2067 | goto out_noeventfd; /* -EINVAL */ |
2fd6df2f JM |
2068 | } else { |
2069 | eventfd = eventfd_ctx_fdget(data->u.evtchn.deliver.eventfd.fd); | |
2070 | if (IS_ERR(eventfd)) { | |
2071 | ret = PTR_ERR(eventfd); | |
73536338 | 2072 | goto out_noeventfd; |
2fd6df2f JM |
2073 | } |
2074 | } | |
2075 | break; | |
2076 | ||
2077 | case EVTCHNSTAT_virq: | |
2078 | case EVTCHNSTAT_closed: | |
2079 | case EVTCHNSTAT_unbound: | |
2080 | case EVTCHNSTAT_pirq: | |
2081 | default: /* Unknown event channel type */ | |
2082 | goto out; /* -EINVAL */ | |
2083 | } | |
2084 | ||
2085 | evtchnfd->send_port = data->u.evtchn.send_port; | |
2086 | evtchnfd->type = data->u.evtchn.type; | |
2087 | if (eventfd) { | |
2088 | evtchnfd->deliver.eventfd.ctx = eventfd; | |
2089 | } else { | |
2090 | /* We only support 2 level event channels for now */ | |
2091 | if (data->u.evtchn.deliver.port.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) | |
2092 | goto out; /* -EINVAL; */ | |
2093 | ||
2094 | evtchnfd->deliver.port.port = data->u.evtchn.deliver.port.port; | |
2095 | evtchnfd->deliver.port.vcpu_id = data->u.evtchn.deliver.port.vcpu; | |
2096 | evtchnfd->deliver.port.vcpu_idx = -1; | |
2097 | evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority; | |
2098 | } | |
2099 | ||
310bc395 | 2100 | mutex_lock(&kvm->arch.xen.xen_lock); |
2fd6df2f JM |
2101 | ret = idr_alloc(&kvm->arch.xen.evtchn_ports, evtchnfd, port, port + 1, |
2102 | GFP_KERNEL); | |
310bc395 | 2103 | mutex_unlock(&kvm->arch.xen.xen_lock); |
2fd6df2f JM |
2104 | if (ret >= 0) |
2105 | return 0; | |
2106 | ||
2107 | if (ret == -ENOSPC) | |
2108 | ret = -EEXIST; | |
2109 | out: | |
2110 | if (eventfd) | |
2111 | eventfd_ctx_put(eventfd); | |
73536338 | 2112 | out_noeventfd: |
2fd6df2f JM |
2113 | kfree(evtchnfd); |
2114 | return ret; | |
2115 | } | |
2116 | ||
2117 | static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port) | |
2118 | { | |
2119 | struct evtchnfd *evtchnfd; | |
2120 | ||
310bc395 | 2121 | mutex_lock(&kvm->arch.xen.xen_lock); |
2fd6df2f | 2122 | evtchnfd = idr_remove(&kvm->arch.xen.evtchn_ports, port); |
310bc395 | 2123 | mutex_unlock(&kvm->arch.xen.xen_lock); |
2fd6df2f JM |
2124 | |
2125 | if (!evtchnfd) | |
2126 | return -ENOENT; | |
2127 | ||
70eae030 | 2128 | synchronize_srcu(&kvm->srcu); |
2fd6df2f JM |
2129 | if (!evtchnfd->deliver.port.port) |
2130 | eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx); | |
2131 | kfree(evtchnfd); | |
2132 | return 0; | |
2133 | } | |
2134 | ||
2135 | static int kvm_xen_eventfd_reset(struct kvm *kvm) | |
2136 | { | |
a79b53aa | 2137 | struct evtchnfd *evtchnfd, **all_evtchnfds; |
2fd6df2f | 2138 | int i; |
a79b53aa | 2139 | int n = 0; |
2fd6df2f | 2140 | |
310bc395 | 2141 | mutex_lock(&kvm->arch.xen.xen_lock); |
a79b53aa PB |
2142 | |
2143 | /* | |
2144 | * Because synchronize_srcu() cannot be called inside the | |
2145 | * critical section, first collect all the evtchnfd objects | |
2146 | * in an array as they are removed from evtchn_ports. | |
2147 | */ | |
2148 | idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) | |
2149 | n++; | |
2150 | ||
2151 | all_evtchnfds = kmalloc_array(n, sizeof(struct evtchnfd *), GFP_KERNEL); | |
2152 | if (!all_evtchnfds) { | |
310bc395 | 2153 | mutex_unlock(&kvm->arch.xen.xen_lock); |
a79b53aa PB |
2154 | return -ENOMEM; |
2155 | } | |
2156 | ||
2157 | n = 0; | |
2fd6df2f | 2158 | idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) { |
a79b53aa | 2159 | all_evtchnfds[n++] = evtchnfd; |
2fd6df2f | 2160 | idr_remove(&kvm->arch.xen.evtchn_ports, evtchnfd->send_port); |
a79b53aa | 2161 | } |
310bc395 | 2162 | mutex_unlock(&kvm->arch.xen.xen_lock); |
a79b53aa PB |
2163 | |
2164 | synchronize_srcu(&kvm->srcu); | |
2165 | ||
2166 | while (n--) { | |
2167 | evtchnfd = all_evtchnfds[n]; | |
2fd6df2f JM |
2168 | if (!evtchnfd->deliver.port.port) |
2169 | eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx); | |
2170 | kfree(evtchnfd); | |
2171 | } | |
a79b53aa | 2172 | kfree(all_evtchnfds); |
2fd6df2f JM |
2173 | |
2174 | return 0; | |
2175 | } | |
2176 | ||
2177 | static int kvm_xen_setattr_evtchn(struct kvm *kvm, struct kvm_xen_hvm_attr *data) | |
2178 | { | |
2179 | u32 port = data->u.evtchn.send_port; | |
2180 | ||
2181 | if (data->u.evtchn.flags == KVM_XEN_EVTCHN_RESET) | |
2182 | return kvm_xen_eventfd_reset(kvm); | |
2183 | ||
2184 | if (!port || port >= max_evtchn_port(kvm)) | |
2185 | return -EINVAL; | |
2186 | ||
2187 | if (data->u.evtchn.flags == KVM_XEN_EVTCHN_DEASSIGN) | |
2188 | return kvm_xen_eventfd_deassign(kvm, port); | |
2189 | if (data->u.evtchn.flags == KVM_XEN_EVTCHN_UPDATE) | |
2190 | return kvm_xen_eventfd_update(kvm, data); | |
2191 | if (data->u.evtchn.flags) | |
2192 | return -EINVAL; | |
2193 | ||
2194 | return kvm_xen_eventfd_assign(kvm, data); | |
2195 | } | |
2196 | ||
2197 | static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r) | |
2198 | { | |
2199 | struct evtchnfd *evtchnfd; | |
2200 | struct evtchn_send send; | |
92c58965 | 2201 | struct x86_exception e; |
2fd6df2f | 2202 | |
92c58965 DW |
2203 | /* Sanity check: this structure is the same for 32-bit and 64-bit */ |
2204 | BUILD_BUG_ON(sizeof(send) != 4); | |
92c58965 | 2205 | if (kvm_read_guest_virt(vcpu, param, &send, sizeof(send), &e)) { |
2fd6df2f JM |
2206 | *r = -EFAULT; |
2207 | return true; | |
2208 | } | |
2209 | ||
70eae030 PB |
2210 | /* |
2211 | * evtchnfd is protected by kvm->srcu; the idr lookup instead | |
2212 | * is protected by RCU. | |
2213 | */ | |
2214 | rcu_read_lock(); | |
2fd6df2f | 2215 | evtchnfd = idr_find(&vcpu->kvm->arch.xen.evtchn_ports, send.port); |
70eae030 | 2216 | rcu_read_unlock(); |
2fd6df2f JM |
2217 | if (!evtchnfd) |
2218 | return false; | |
2219 | ||
2220 | if (evtchnfd->deliver.port.port) { | |
2221 | int ret = kvm_xen_set_evtchn(&evtchnfd->deliver.port, vcpu->kvm); | |
2222 | if (ret < 0 && ret != -ENOTCONN) | |
2223 | return false; | |
2224 | } else { | |
3652117f | 2225 | eventfd_signal(evtchnfd->deliver.eventfd.ctx); |
2fd6df2f JM |
2226 | } |
2227 | ||
2228 | *r = 0; | |
2229 | return true; | |
2230 | } | |
2231 | ||
942c2490 DW |
2232 | void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu) |
2233 | { | |
2234 | vcpu->arch.xen.vcpu_id = vcpu->vcpu_idx; | |
1a65105a | 2235 | vcpu->arch.xen.poll_evtchn = 0; |
52491a38 | 2236 | |
1a65105a | 2237 | timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0); |
52491a38 | 2238 | |
a4bff3df PD |
2239 | kvm_gpc_init(&vcpu->arch.xen.runstate_cache, vcpu->kvm); |
2240 | kvm_gpc_init(&vcpu->arch.xen.runstate2_cache, vcpu->kvm); | |
2241 | kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache, vcpu->kvm); | |
2242 | kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache, vcpu->kvm); | |
942c2490 DW |
2243 | } |
2244 | ||
a795cd43 DW |
2245 | void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) |
2246 | { | |
53639526 JM |
2247 | if (kvm_xen_timer_enabled(vcpu)) |
2248 | kvm_xen_stop_timer(vcpu); | |
2249 | ||
8c82a0b3 ML |
2250 | kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache); |
2251 | kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache); | |
2252 | kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); | |
2253 | kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache); | |
52491a38 | 2254 | |
1a65105a | 2255 | del_timer_sync(&vcpu->arch.xen.poll_timer); |
a795cd43 | 2256 | } |
2fd6df2f | 2257 | |
f422f853 PD |
2258 | void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu) |
2259 | { | |
2260 | struct kvm_cpuid_entry2 *entry; | |
2261 | u32 function; | |
2262 | ||
2263 | if (!vcpu->arch.xen.cpuid.base) | |
2264 | return; | |
2265 | ||
2266 | function = vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3); | |
2267 | if (function > vcpu->arch.xen.cpuid.limit) | |
2268 | return; | |
2269 | ||
2270 | entry = kvm_find_cpuid_entry_index(vcpu, function, 1); | |
2271 | if (entry) { | |
2272 | entry->ecx = vcpu->arch.hv_clock.tsc_to_system_mul; | |
2273 | entry->edx = vcpu->arch.hv_clock.tsc_shift; | |
2274 | } | |
2275 | ||
2276 | entry = kvm_find_cpuid_entry_index(vcpu, function, 2); | |
2277 | if (entry) | |
2278 | entry->eax = vcpu->arch.hw_tsc_khz; | |
2279 | } | |
2280 | ||
2fd6df2f JM |
2281 | void kvm_xen_init_vm(struct kvm *kvm) |
2282 | { | |
310bc395 | 2283 | mutex_init(&kvm->arch.xen.xen_lock); |
2fd6df2f | 2284 | idr_init(&kvm->arch.xen.evtchn_ports); |
a4bff3df | 2285 | kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm); |
2fd6df2f JM |
2286 | } |
2287 | ||
2288 | void kvm_xen_destroy_vm(struct kvm *kvm) | |
2289 | { | |
2290 | struct evtchnfd *evtchnfd; | |
2291 | int i; | |
2292 | ||
8c82a0b3 | 2293 | kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache); |
2fd6df2f JM |
2294 | |
2295 | idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) { | |
2296 | if (!evtchnfd->deliver.port.port) | |
2297 | eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx); | |
2298 | kfree(evtchnfd); | |
2299 | } | |
2300 | idr_destroy(&kvm->arch.xen.evtchn_ports); | |
2301 | ||
2302 | if (kvm->arch.xen_hvm_config.msr) | |
2303 | static_branch_slow_dec_deferred(&kvm_xen_enabled); | |
2304 | } |