Commit | Line | Data |
---|---|---|
ef0f6496 JR |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* | |
3 | * Kernel-based Virtual Machine driver for Linux | |
4 | * | |
5 | * AMD SVM support | |
6 | * | |
7 | * Copyright (C) 2006 Qumranet, Inc. | |
8 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. | |
9 | * | |
10 | * Authors: | |
11 | * Yaniv Kamay <yaniv@qumranet.com> | |
12 | * Avi Kivity <avi@qumranet.com> | |
13 | */ | |
14 | ||
15 | #define pr_fmt(fmt) "SVM: " fmt | |
16 | ||
17 | #include <linux/kvm_types.h> | |
18 | #include <linux/hashtable.h> | |
19 | #include <linux/amd-iommu.h> | |
20 | #include <linux/kvm_host.h> | |
21 | ||
22 | #include <asm/irq_remapping.h> | |
23 | ||
24 | #include "trace.h" | |
25 | #include "lapic.h" | |
26 | #include "x86.h" | |
27 | #include "irq.h" | |
28 | #include "svm.h" | |
29 | ||
ef0f6496 JR |
30 | /* AVIC GATAG is encoded using VM and VCPU IDs */ |
31 | #define AVIC_VCPU_ID_BITS 8 | |
32 | #define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1) | |
33 | ||
34 | #define AVIC_VM_ID_BITS 24 | |
35 | #define AVIC_VM_ID_NR (1 << AVIC_VM_ID_BITS) | |
36 | #define AVIC_VM_ID_MASK ((1 << AVIC_VM_ID_BITS) - 1) | |
37 | ||
38 | #define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \ | |
39 | (y & AVIC_VCPU_ID_MASK)) | |
40 | #define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK) | |
41 | #define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK) | |
42 | ||
4bdec12a SS |
43 | static bool force_avic; |
44 | module_param_unsafe(force_avic, bool, 0444); | |
45 | ||
ef0f6496 JR |
46 | /* Note: |
47 | * This hash table is used to map VM_ID to a struct kvm_svm, | |
48 | * when handling AMD IOMMU GALOG notification to schedule in | |
49 | * a particular vCPU. | |
50 | */ | |
51 | #define SVM_VM_DATA_HASH_BITS 8 | |
52 | static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS); | |
53 | static u32 next_vm_id = 0; | |
54 | static bool next_vm_id_wrapped = 0; | |
55 | static DEFINE_SPINLOCK(svm_vm_data_hash_lock); | |
f628a34a | 56 | bool x2avic_enabled; |
ef0f6496 JR |
57 | |
58 | /* | |
59 | * This is a wrapper of struct amd_iommu_ir_data. | |
60 | */ | |
61 | struct amd_svm_iommu_ir { | |
62 | struct list_head node; /* Used by SVM for per-vcpu ir_list */ | |
63 | void *data; /* Storing pointer to struct amd_ir_data */ | |
64 | }; | |
65 | ||
4d1d7942 SS |
66 | static void avic_activate_vmcb(struct vcpu_svm *svm) |
67 | { | |
68 | struct vmcb *vmcb = svm->vmcb01.ptr; | |
69 | ||
70 | vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK); | |
71 | vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK; | |
72 | ||
73 | vmcb->control.int_ctl |= AVIC_ENABLE_MASK; | |
0e311d33 | 74 | |
2008fab3 SC |
75 | /* |
76 | * Note: KVM supports hybrid-AVIC mode, where KVM emulates x2APIC MSR | |
77 | * accesses, while interrupt injection to a running vCPU can be | |
78 | * achieved using AVIC doorbell. KVM disables the APIC access page | |
79 | * (deletes the memslot) if any vCPU has x2APIC enabled, thus enabling | |
80 | * AVIC in hybrid mode activates only the doorbell mechanism. | |
0e311d33 | 81 | */ |
f628a34a | 82 | if (x2avic_enabled && apic_x2apic_mode(svm->vcpu.arch.apic)) { |
4d1d7942 SS |
83 | vmcb->control.int_ctl |= X2APIC_MODE_MASK; |
84 | vmcb->control.avic_physical_id |= X2AVIC_MAX_PHYSICAL_ID; | |
85 | /* Disabling MSR intercept for x2APIC registers */ | |
86 | svm_set_x2apic_msr_interception(svm, false); | |
87 | } else { | |
0ccf3e7c SC |
88 | /* |
89 | * Flush the TLB, the guest may have inserted a non-APIC | |
90 | * mapping into the TLB while AVIC was disabled. | |
91 | */ | |
92 | kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, &svm->vcpu); | |
93 | ||
0e311d33 | 94 | /* For xAVIC and hybrid-xAVIC modes */ |
4d1d7942 SS |
95 | vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID; |
96 | /* Enabling MSR intercept for x2APIC registers */ | |
97 | svm_set_x2apic_msr_interception(svm, true); | |
98 | } | |
99 | } | |
100 | ||
101 | static void avic_deactivate_vmcb(struct vcpu_svm *svm) | |
102 | { | |
103 | struct vmcb *vmcb = svm->vmcb01.ptr; | |
104 | ||
105 | vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK); | |
106 | vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK; | |
107 | ||
091abbf5 ML |
108 | /* |
109 | * If running nested and the guest uses its own MSR bitmap, there | |
110 | * is no need to update L0's msr bitmap | |
111 | */ | |
112 | if (is_guest_mode(&svm->vcpu) && | |
113 | vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)) | |
114 | return; | |
115 | ||
4d1d7942 SS |
116 | /* Enabling MSR intercept for x2APIC registers */ |
117 | svm_set_x2apic_msr_interception(svm, true); | |
118 | } | |
ef0f6496 JR |
119 | |
120 | /* Note: | |
121 | * This function is called from IOMMU driver to notify | |
122 | * SVM to schedule in a particular vCPU of a particular VM. | |
123 | */ | |
124 | int avic_ga_log_notifier(u32 ga_tag) | |
125 | { | |
126 | unsigned long flags; | |
127 | struct kvm_svm *kvm_svm; | |
128 | struct kvm_vcpu *vcpu = NULL; | |
129 | u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag); | |
130 | u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag); | |
131 | ||
132 | pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id); | |
133 | trace_kvm_avic_ga_log(vm_id, vcpu_id); | |
134 | ||
135 | spin_lock_irqsave(&svm_vm_data_hash_lock, flags); | |
136 | hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) { | |
137 | if (kvm_svm->avic_vm_id != vm_id) | |
138 | continue; | |
139 | vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id); | |
140 | break; | |
141 | } | |
142 | spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); | |
143 | ||
144 | /* Note: | |
145 | * At this point, the IOMMU should have already set the pending | |
146 | * bit in the vAPIC backing page. So, we just need to schedule | |
147 | * in the vcpu. | |
148 | */ | |
149 | if (vcpu) | |
150 | kvm_vcpu_wake_up(vcpu); | |
151 | ||
152 | return 0; | |
153 | } | |
154 | ||
155 | void avic_vm_destroy(struct kvm *kvm) | |
156 | { | |
157 | unsigned long flags; | |
158 | struct kvm_svm *kvm_svm = to_kvm_svm(kvm); | |
159 | ||
fdf513e3 | 160 | if (!enable_apicv) |
ef0f6496 JR |
161 | return; |
162 | ||
163 | if (kvm_svm->avic_logical_id_table_page) | |
164 | __free_page(kvm_svm->avic_logical_id_table_page); | |
165 | if (kvm_svm->avic_physical_id_table_page) | |
166 | __free_page(kvm_svm->avic_physical_id_table_page); | |
167 | ||
168 | spin_lock_irqsave(&svm_vm_data_hash_lock, flags); | |
169 | hash_del(&kvm_svm->hnode); | |
170 | spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); | |
171 | } | |
172 | ||
173 | int avic_vm_init(struct kvm *kvm) | |
174 | { | |
175 | unsigned long flags; | |
176 | int err = -ENOMEM; | |
177 | struct kvm_svm *kvm_svm = to_kvm_svm(kvm); | |
178 | struct kvm_svm *k2; | |
179 | struct page *p_page; | |
180 | struct page *l_page; | |
181 | u32 vm_id; | |
182 | ||
fdf513e3 | 183 | if (!enable_apicv) |
ef0f6496 JR |
184 | return 0; |
185 | ||
186 | /* Allocating physical APIC ID table (4KB) */ | |
ae5a2a39 | 187 | p_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); |
ef0f6496 JR |
188 | if (!p_page) |
189 | goto free_avic; | |
190 | ||
191 | kvm_svm->avic_physical_id_table_page = p_page; | |
ef0f6496 JR |
192 | |
193 | /* Allocating logical APIC ID table (4KB) */ | |
ae5a2a39 | 194 | l_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); |
ef0f6496 JR |
195 | if (!l_page) |
196 | goto free_avic; | |
197 | ||
198 | kvm_svm->avic_logical_id_table_page = l_page; | |
ef0f6496 JR |
199 | |
200 | spin_lock_irqsave(&svm_vm_data_hash_lock, flags); | |
201 | again: | |
202 | vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK; | |
203 | if (vm_id == 0) { /* id is 1-based, zero is not okay */ | |
204 | next_vm_id_wrapped = 1; | |
205 | goto again; | |
206 | } | |
207 | /* Is it still in use? Only possible if wrapped at least once */ | |
208 | if (next_vm_id_wrapped) { | |
209 | hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) { | |
210 | if (k2->avic_vm_id == vm_id) | |
211 | goto again; | |
212 | } | |
213 | } | |
214 | kvm_svm->avic_vm_id = vm_id; | |
215 | hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id); | |
216 | spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); | |
217 | ||
218 | return 0; | |
219 | ||
220 | free_avic: | |
221 | avic_vm_destroy(kvm); | |
222 | return err; | |
223 | } | |
224 | ||
1ee73a33 | 225 | void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb) |
ef0f6496 | 226 | { |
ef0f6496 JR |
227 | struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm); |
228 | phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page)); | |
229 | phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page)); | |
230 | phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page)); | |
231 | ||
232 | vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK; | |
233 | vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK; | |
234 | vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK; | |
73143035 ML |
235 | vmcb->control.avic_vapic_bar = APIC_DEFAULT_PHYS_BASE & VMCB_AVIC_APIC_BAR_MASK; |
236 | ||
ef0f6496 | 237 | if (kvm_apicv_activated(svm->vcpu.kvm)) |
4d1d7942 | 238 | avic_activate_vmcb(svm); |
ef0f6496 | 239 | else |
4d1d7942 | 240 | avic_deactivate_vmcb(svm); |
ef0f6496 JR |
241 | } |
242 | ||
243 | static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, | |
244 | unsigned int index) | |
245 | { | |
246 | u64 *avic_physical_id_table; | |
247 | struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); | |
248 | ||
f628a34a SC |
249 | if ((!x2avic_enabled && index > AVIC_MAX_PHYSICAL_ID) || |
250 | (index > X2AVIC_MAX_PHYSICAL_ID)) | |
ef0f6496 JR |
251 | return NULL; |
252 | ||
253 | avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page); | |
254 | ||
255 | return &avic_physical_id_table[index]; | |
256 | } | |
257 | ||
ef0f6496 JR |
258 | static int avic_init_backing_page(struct kvm_vcpu *vcpu) |
259 | { | |
260 | u64 *entry, new_entry; | |
261 | int id = vcpu->vcpu_id; | |
262 | struct vcpu_svm *svm = to_svm(vcpu); | |
263 | ||
f628a34a SC |
264 | if ((!x2avic_enabled && id > AVIC_MAX_PHYSICAL_ID) || |
265 | (id > X2AVIC_MAX_PHYSICAL_ID)) | |
ef0f6496 JR |
266 | return -EINVAL; |
267 | ||
63129754 | 268 | if (!vcpu->arch.apic->regs) |
ef0f6496 JR |
269 | return -EINVAL; |
270 | ||
271 | if (kvm_apicv_activated(vcpu->kvm)) { | |
272 | int ret; | |
273 | ||
c482f2ce SC |
274 | /* |
275 | * Note, AVIC hardware walks the nested page table to check | |
276 | * permissions, but does not use the SPA address specified in | |
277 | * the leaf SPTE since it uses address in the AVIC_BACKING_PAGE | |
278 | * pointer field of the VMCB. | |
279 | */ | |
280 | ret = kvm_alloc_apic_access_page(vcpu->kvm); | |
ef0f6496 JR |
281 | if (ret) |
282 | return ret; | |
283 | } | |
284 | ||
63129754 | 285 | svm->avic_backing_page = virt_to_page(vcpu->arch.apic->regs); |
ef0f6496 JR |
286 | |
287 | /* Setting AVIC backing page address in the phy APIC ID table */ | |
288 | entry = avic_get_physical_id_entry(vcpu, id); | |
289 | if (!entry) | |
290 | return -EINVAL; | |
291 | ||
292 | new_entry = __sme_set((page_to_phys(svm->avic_backing_page) & | |
293 | AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) | | |
294 | AVIC_PHYSICAL_ID_ENTRY_VALID_MASK); | |
295 | WRITE_ONCE(*entry, new_entry); | |
296 | ||
297 | svm->avic_physical_id_cache = entry; | |
298 | ||
299 | return 0; | |
300 | } | |
301 | ||
66fa226c | 302 | void avic_ring_doorbell(struct kvm_vcpu *vcpu) |
0a5f7842 ML |
303 | { |
304 | /* | |
305 | * Note, the vCPU could get migrated to a different pCPU at any point, | |
306 | * which could result in signalling the wrong/previous pCPU. But if | |
307 | * that happens the vCPU is guaranteed to do a VMRUN (after being | |
308 | * migrated) and thus will process pending interrupts, i.e. a doorbell | |
309 | * is not needed (and the spurious one is harmless). | |
310 | */ | |
311 | int cpu = READ_ONCE(vcpu->cpu); | |
312 | ||
39b6b8c3 | 313 | if (cpu != get_cpu()) { |
0a5f7842 | 314 | wrmsrl(MSR_AMD64_SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu)); |
39b6b8c3 SS |
315 | trace_kvm_avic_doorbell(vcpu->vcpu_id, kvm_cpu_get_apicid(cpu)); |
316 | } | |
0a5f7842 ML |
317 | put_cpu(); |
318 | } | |
319 | ||
1d22a597 SC |
320 | |
321 | static void avic_kick_vcpu(struct kvm_vcpu *vcpu, u32 icrl) | |
322 | { | |
323 | vcpu->arch.apic->irr_pending = true; | |
324 | svm_complete_interrupt_delivery(vcpu, | |
325 | icrl & APIC_MODE_MASK, | |
326 | icrl & APIC_INT_LEVELTRIG, | |
327 | icrl & APIC_VECTOR_MASK); | |
328 | } | |
329 | ||
bbfc7aa6 SC |
330 | static void avic_kick_vcpu_by_physical_id(struct kvm *kvm, u32 physical_id, |
331 | u32 icrl) | |
332 | { | |
333 | /* | |
334 | * KVM inhibits AVIC if any vCPU ID diverges from the vCPUs APIC ID, | |
335 | * i.e. APIC ID == vCPU ID. | |
336 | */ | |
337 | struct kvm_vcpu *target_vcpu = kvm_get_vcpu_by_id(kvm, physical_id); | |
338 | ||
339 | /* Once again, nothing to do if the target vCPU doesn't exist. */ | |
340 | if (unlikely(!target_vcpu)) | |
341 | return; | |
342 | ||
343 | avic_kick_vcpu(target_vcpu, icrl); | |
344 | } | |
345 | ||
346 | static void avic_kick_vcpu_by_logical_id(struct kvm *kvm, u32 *avic_logical_id_table, | |
347 | u32 logid_index, u32 icrl) | |
348 | { | |
349 | u32 physical_id; | |
350 | ||
351 | if (avic_logical_id_table) { | |
352 | u32 logid_entry = avic_logical_id_table[logid_index]; | |
353 | ||
354 | /* Nothing to do if the logical destination is invalid. */ | |
355 | if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK))) | |
356 | return; | |
357 | ||
358 | physical_id = logid_entry & | |
359 | AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; | |
360 | } else { | |
361 | /* | |
362 | * For x2APIC, the logical APIC ID is a read-only value that is | |
363 | * derived from the x2APIC ID, thus the x2APIC ID can be found | |
364 | * by reversing the calculation (stored in logid_index). Note, | |
365 | * bits 31:20 of the x2APIC ID aren't propagated to the logical | |
366 | * ID, but KVM limits the x2APIC ID limited to KVM_MAX_VCPU_IDS. | |
367 | */ | |
368 | physical_id = logid_index; | |
369 | } | |
370 | ||
371 | avic_kick_vcpu_by_physical_id(kvm, physical_id, icrl); | |
372 | } | |
373 | ||
7223fd2d SS |
374 | /* |
375 | * A fast-path version of avic_kick_target_vcpus(), which attempts to match | |
376 | * destination APIC ID to vCPU without looping through all vCPUs. | |
377 | */ | |
378 | static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source, | |
379 | u32 icrl, u32 icrh, u32 index) | |
e6c804a8 | 380 | { |
7223fd2d SS |
381 | int dest_mode = icrl & APIC_DEST_MASK; |
382 | int shorthand = icrl & APIC_SHORT_MASK; | |
383 | struct kvm_svm *kvm_svm = to_kvm_svm(kvm); | |
bbfc7aa6 | 384 | u32 dest; |
7223fd2d SS |
385 | |
386 | if (shorthand != APIC_DEST_NOSHORT) | |
387 | return -EINVAL; | |
388 | ||
603ccef4 ML |
389 | if (apic_x2apic_mode(source)) |
390 | dest = icrh; | |
391 | else | |
bf348f66 | 392 | dest = GET_XAPIC_DEST_FIELD(icrh); |
603ccef4 | 393 | |
7223fd2d | 394 | if (dest_mode == APIC_DEST_PHYSICAL) { |
603ccef4 ML |
395 | /* broadcast destination, use slow path */ |
396 | if (apic_x2apic_mode(source) && dest == X2APIC_BROADCAST) | |
397 | return -EINVAL; | |
398 | if (!apic_x2apic_mode(source) && dest == APIC_BROADCAST) | |
399 | return -EINVAL; | |
400 | ||
bbfc7aa6 | 401 | if (WARN_ON_ONCE(dest != index)) |
603ccef4 ML |
402 | return -EINVAL; |
403 | ||
bbfc7aa6 | 404 | avic_kick_vcpu_by_physical_id(kvm, dest, icrl); |
7223fd2d | 405 | } else { |
bbfc7aa6 SC |
406 | u32 *avic_logical_id_table; |
407 | unsigned long bitmap, i; | |
408 | u32 cluster; | |
603ccef4 ML |
409 | |
410 | if (apic_x2apic_mode(source)) { | |
411 | /* 16 bit dest mask, 16 bit cluster id */ | |
da3fb46d | 412 | bitmap = dest & 0xFFFF; |
603ccef4 ML |
413 | cluster = (dest >> 16) << 4; |
414 | } else if (kvm_lapic_get_reg(source, APIC_DFR) == APIC_DFR_FLAT) { | |
415 | /* 8 bit dest mask*/ | |
416 | bitmap = dest; | |
417 | cluster = 0; | |
7223fd2d | 418 | } else { |
603ccef4 ML |
419 | /* 4 bit desk mask, 4 bit cluster id */ |
420 | bitmap = dest & 0xF; | |
421 | cluster = (dest >> 4) << 2; | |
7223fd2d | 422 | } |
7223fd2d | 423 | |
8578e451 | 424 | /* Nothing to do if there are no destinations in the cluster. */ |
603ccef4 | 425 | if (unlikely(!bitmap)) |
603ccef4 | 426 | return 0; |
7223fd2d | 427 | |
bbfc7aa6 SC |
428 | if (apic_x2apic_mode(source)) |
429 | avic_logical_id_table = NULL; | |
430 | else | |
431 | avic_logical_id_table = page_address(kvm_svm->avic_logical_id_table_page); | |
603ccef4 | 432 | |
bbfc7aa6 SC |
433 | /* |
434 | * AVIC is inhibited if vCPUs aren't mapped 1:1 with logical | |
435 | * IDs, thus each bit in the destination is guaranteed to map | |
436 | * to at most one vCPU. | |
437 | */ | |
438 | for_each_set_bit(i, &bitmap, 16) | |
439 | avic_kick_vcpu_by_logical_id(kvm, avic_logical_id_table, | |
440 | cluster + i, icrl); | |
7223fd2d SS |
441 | } |
442 | ||
603ccef4 | 443 | return 0; |
7223fd2d SS |
444 | } |
445 | ||
446 | static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source, | |
447 | u32 icrl, u32 icrh, u32 index) | |
448 | { | |
a879a88e | 449 | u32 dest = apic_x2apic_mode(source) ? icrh : GET_XAPIC_DEST_FIELD(icrh); |
46808a4c | 450 | unsigned long i; |
7223fd2d SS |
451 | struct kvm_vcpu *vcpu; |
452 | ||
453 | if (!avic_kick_target_vcpus_fast(kvm, source, icrl, icrh, index)) | |
454 | return; | |
e6c804a8 | 455 | |
9f084f7c SS |
456 | trace_kvm_avic_kick_vcpu_slowpath(icrh, icrl, index); |
457 | ||
202470d5 SC |
458 | /* |
459 | * Wake any target vCPUs that are blocking, i.e. waiting for a wake | |
460 | * event. There's no need to signal doorbells, as hardware has handled | |
461 | * vCPUs that were in guest at the time of the IPI, and vCPUs that have | |
462 | * since entered the guest will have processed pending IRQs at VMRUN. | |
463 | */ | |
e6c804a8 | 464 | kvm_for_each_vcpu(i, vcpu, kvm) { |
202470d5 | 465 | if (kvm_apic_match_dest(vcpu, source, icrl & APIC_SHORT_MASK, |
1d22a597 SC |
466 | dest, icrl & APIC_DEST_MASK)) |
467 | avic_kick_vcpu(vcpu, icrl); | |
e6c804a8 SC |
468 | } |
469 | } | |
470 | ||
63129754 | 471 | int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu) |
ef0f6496 | 472 | { |
63129754 | 473 | struct vcpu_svm *svm = to_svm(vcpu); |
ef0f6496 JR |
474 | u32 icrh = svm->vmcb->control.exit_info_1 >> 32; |
475 | u32 icrl = svm->vmcb->control.exit_info_1; | |
476 | u32 id = svm->vmcb->control.exit_info_2 >> 32; | |
7223fd2d | 477 | u32 index = svm->vmcb->control.exit_info_2 & 0x1FF; |
63129754 | 478 | struct kvm_lapic *apic = vcpu->arch.apic; |
ef0f6496 | 479 | |
63129754 | 480 | trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index); |
ef0f6496 JR |
481 | |
482 | switch (id) { | |
5aede752 | 483 | case AVIC_IPI_FAILURE_INVALID_TARGET: |
ef0f6496 JR |
484 | case AVIC_IPI_FAILURE_INVALID_INT_TYPE: |
485 | /* | |
b51818af | 486 | * Emulate IPIs that are not handled by AVIC hardware, which |
5aede752 SC |
487 | * only virtualizes Fixed, Edge-Triggered INTRs, and falls over |
488 | * if _any_ targets are invalid, e.g. if the logical mode mask | |
489 | * is a superset of running vCPUs. | |
490 | * | |
491 | * The exit is a trap, e.g. ICR holds the correct value and RIP | |
492 | * has been advanced, KVM is responsible only for emulating the | |
493 | * IPI. Sadly, hardware may sometimes leave the BUSY flag set, | |
494 | * in which case KVM needs to emulate the ICR write as well in | |
b51818af | 495 | * order to clear the BUSY flag. |
ef0f6496 | 496 | */ |
b51818af SC |
497 | if (icrl & APIC_ICR_BUSY) |
498 | kvm_apic_write_nodecode(vcpu, APIC_ICR); | |
499 | else | |
500 | kvm_apic_send_ipi(apic, icrl, icrh); | |
ef0f6496 | 501 | break; |
e6c804a8 | 502 | case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: |
ef0f6496 JR |
503 | /* |
504 | * At this point, we expect that the AVIC HW has already | |
505 | * set the appropriate IRR bits on the valid target | |
506 | * vcpus. So, we just need to kick the appropriate vcpu. | |
507 | */ | |
7223fd2d | 508 | avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh, index); |
ef0f6496 | 509 | break; |
ef0f6496 JR |
510 | case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE: |
511 | WARN_ONCE(1, "Invalid backing page\n"); | |
512 | break; | |
513 | default: | |
514 | pr_err("Unknown IPI interception\n"); | |
515 | } | |
516 | ||
517 | return 1; | |
518 | } | |
519 | ||
f44509f8 ML |
520 | unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu) |
521 | { | |
522 | if (is_guest_mode(vcpu)) | |
523 | return APICV_INHIBIT_REASON_NESTED; | |
524 | return 0; | |
525 | } | |
526 | ||
ef0f6496 JR |
527 | static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat) |
528 | { | |
529 | struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); | |
ef0f6496 | 530 | u32 *logical_apic_id_table; |
1808c950 | 531 | u32 cluster, index; |
ef0f6496 | 532 | |
1808c950 | 533 | ldr = GET_APIC_LOGICAL_ID(ldr); |
ef0f6496 | 534 | |
1808c950 SC |
535 | if (flat) { |
536 | cluster = 0; | |
537 | } else { | |
538 | cluster = (ldr >> 4); | |
539 | if (cluster >= 0xf) | |
ef0f6496 | 540 | return NULL; |
1808c950 | 541 | ldr &= 0xf; |
ef0f6496 | 542 | } |
1808c950 SC |
543 | if (!ldr || !is_power_of_2(ldr)) |
544 | return NULL; | |
545 | ||
546 | index = __ffs(ldr); | |
547 | if (WARN_ON_ONCE(index > 7)) | |
548 | return NULL; | |
549 | index += (cluster << 2); | |
ef0f6496 JR |
550 | |
551 | logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page); | |
552 | ||
553 | return &logical_apic_id_table[index]; | |
554 | } | |
555 | ||
4f160b7b | 556 | static void avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr) |
ef0f6496 JR |
557 | { |
558 | bool flat; | |
559 | u32 *entry, new_entry; | |
560 | ||
561 | flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT; | |
562 | entry = avic_get_logical_id_entry(vcpu, ldr, flat); | |
563 | if (!entry) | |
4f160b7b | 564 | return; |
ef0f6496 JR |
565 | |
566 | new_entry = READ_ONCE(*entry); | |
567 | new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; | |
568 | new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK); | |
569 | new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK; | |
570 | WRITE_ONCE(*entry, new_entry); | |
ef0f6496 JR |
571 | } |
572 | ||
573 | static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu) | |
574 | { | |
575 | struct vcpu_svm *svm = to_svm(vcpu); | |
576 | bool flat = svm->dfr_reg == APIC_DFR_FLAT; | |
ab1b1dc1 | 577 | u32 *entry; |
ef0f6496 | 578 | |
ab1b1dc1 SS |
579 | /* Note: x2AVIC does not use logical APIC ID table */ |
580 | if (apic_x2apic_mode(vcpu->arch.apic)) | |
581 | return; | |
582 | ||
583 | entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat); | |
ef0f6496 JR |
584 | if (entry) |
585 | clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry); | |
586 | } | |
587 | ||
1ba59a44 | 588 | static void avic_handle_ldr_update(struct kvm_vcpu *vcpu) |
ef0f6496 | 589 | { |
ef0f6496 JR |
590 | struct vcpu_svm *svm = to_svm(vcpu); |
591 | u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR); | |
592 | u32 id = kvm_xapic_id(vcpu->arch.apic); | |
593 | ||
ab1b1dc1 SS |
594 | /* AVIC does not support LDR update for x2APIC */ |
595 | if (apic_x2apic_mode(vcpu->arch.apic)) | |
1ba59a44 | 596 | return; |
ab1b1dc1 | 597 | |
ef0f6496 | 598 | if (ldr == svm->ldr_reg) |
1ba59a44 | 599 | return; |
ef0f6496 JR |
600 | |
601 | avic_invalidate_logical_id_entry(vcpu); | |
602 | ||
4f160b7b SC |
603 | svm->ldr_reg = ldr; |
604 | avic_ldr_write(vcpu, id, ldr); | |
ef0f6496 JR |
605 | } |
606 | ||
ef0f6496 JR |
607 | static void avic_handle_dfr_update(struct kvm_vcpu *vcpu) |
608 | { | |
609 | struct vcpu_svm *svm = to_svm(vcpu); | |
610 | u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR); | |
611 | ||
612 | if (svm->dfr_reg == dfr) | |
613 | return; | |
614 | ||
615 | avic_invalidate_logical_id_entry(vcpu); | |
616 | svm->dfr_reg = dfr; | |
617 | } | |
618 | ||
ed60920e | 619 | static int avic_unaccel_trap_write(struct kvm_vcpu *vcpu) |
ef0f6496 | 620 | { |
ed60920e | 621 | u32 offset = to_svm(vcpu)->vmcb->control.exit_info_1 & |
ef0f6496 JR |
622 | AVIC_UNACCEL_ACCESS_OFFSET_MASK; |
623 | ||
624 | switch (offset) { | |
ef0f6496 | 625 | case APIC_LDR: |
1ba59a44 | 626 | avic_handle_ldr_update(vcpu); |
ef0f6496 JR |
627 | break; |
628 | case APIC_DFR: | |
ed60920e | 629 | avic_handle_dfr_update(vcpu); |
ef0f6496 JR |
630 | break; |
631 | default: | |
632 | break; | |
633 | } | |
634 | ||
ed60920e | 635 | kvm_apic_write_nodecode(vcpu, offset); |
ef0f6496 JR |
636 | return 1; |
637 | } | |
638 | ||
639 | static bool is_avic_unaccelerated_access_trap(u32 offset) | |
640 | { | |
641 | bool ret = false; | |
642 | ||
643 | switch (offset) { | |
644 | case APIC_ID: | |
645 | case APIC_EOI: | |
646 | case APIC_RRR: | |
647 | case APIC_LDR: | |
648 | case APIC_DFR: | |
649 | case APIC_SPIV: | |
650 | case APIC_ESR: | |
651 | case APIC_ICR: | |
652 | case APIC_LVTT: | |
653 | case APIC_LVTTHMR: | |
654 | case APIC_LVTPC: | |
655 | case APIC_LVT0: | |
656 | case APIC_LVT1: | |
657 | case APIC_LVTERR: | |
658 | case APIC_TMICT: | |
659 | case APIC_TDCR: | |
660 | ret = true; | |
661 | break; | |
662 | default: | |
663 | break; | |
664 | } | |
665 | return ret; | |
666 | } | |
667 | ||
63129754 | 668 | int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu) |
ef0f6496 | 669 | { |
63129754 | 670 | struct vcpu_svm *svm = to_svm(vcpu); |
ef0f6496 JR |
671 | int ret = 0; |
672 | u32 offset = svm->vmcb->control.exit_info_1 & | |
673 | AVIC_UNACCEL_ACCESS_OFFSET_MASK; | |
674 | u32 vector = svm->vmcb->control.exit_info_2 & | |
675 | AVIC_UNACCEL_ACCESS_VECTOR_MASK; | |
676 | bool write = (svm->vmcb->control.exit_info_1 >> 32) & | |
677 | AVIC_UNACCEL_ACCESS_WRITE_MASK; | |
678 | bool trap = is_avic_unaccelerated_access_trap(offset); | |
679 | ||
63129754 | 680 | trace_kvm_avic_unaccelerated_access(vcpu->vcpu_id, offset, |
ef0f6496 JR |
681 | trap, write, vector); |
682 | if (trap) { | |
683 | /* Handling Trap */ | |
684 | WARN_ONCE(!write, "svm: Handling trap read.\n"); | |
ed60920e | 685 | ret = avic_unaccel_trap_write(vcpu); |
ef0f6496 JR |
686 | } else { |
687 | /* Handling Fault */ | |
63129754 | 688 | ret = kvm_emulate_instruction(vcpu, 0); |
ef0f6496 JR |
689 | } |
690 | ||
691 | return ret; | |
692 | } | |
693 | ||
694 | int avic_init_vcpu(struct vcpu_svm *svm) | |
695 | { | |
696 | int ret; | |
697 | struct kvm_vcpu *vcpu = &svm->vcpu; | |
698 | ||
fdf513e3 | 699 | if (!enable_apicv || !irqchip_in_kernel(vcpu->kvm)) |
ef0f6496 JR |
700 | return 0; |
701 | ||
63129754 | 702 | ret = avic_init_backing_page(vcpu); |
ef0f6496 JR |
703 | if (ret) |
704 | return ret; | |
705 | ||
706 | INIT_LIST_HEAD(&svm->ir_list); | |
707 | spin_lock_init(&svm->ir_list_lock); | |
708 | svm->dfr_reg = APIC_DFR_FLAT; | |
709 | ||
710 | return ret; | |
711 | } | |
712 | ||
db6e7adf | 713 | void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu) |
ef0f6496 | 714 | { |
ef0f6496 JR |
715 | avic_handle_dfr_update(vcpu); |
716 | avic_handle_ldr_update(vcpu); | |
717 | } | |
718 | ||
db6e7adf | 719 | static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate) |
ef0f6496 JR |
720 | { |
721 | int ret = 0; | |
722 | unsigned long flags; | |
723 | struct amd_svm_iommu_ir *ir; | |
724 | struct vcpu_svm *svm = to_svm(vcpu); | |
725 | ||
726 | if (!kvm_arch_has_assigned_device(vcpu->kvm)) | |
727 | return 0; | |
728 | ||
729 | /* | |
730 | * Here, we go through the per-vcpu ir_list to update all existing | |
731 | * interrupt remapping table entry targeting this vcpu. | |
732 | */ | |
733 | spin_lock_irqsave(&svm->ir_list_lock, flags); | |
734 | ||
735 | if (list_empty(&svm->ir_list)) | |
736 | goto out; | |
737 | ||
738 | list_for_each_entry(ir, &svm->ir_list, node) { | |
739 | if (activate) | |
740 | ret = amd_iommu_activate_guest_mode(ir->data); | |
741 | else | |
742 | ret = amd_iommu_deactivate_guest_mode(ir->data); | |
743 | if (ret) | |
744 | break; | |
745 | } | |
746 | out: | |
747 | spin_unlock_irqrestore(&svm->ir_list_lock, flags); | |
748 | return ret; | |
749 | } | |
750 | ||
ef0f6496 JR |
751 | static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) |
752 | { | |
753 | unsigned long flags; | |
754 | struct amd_svm_iommu_ir *cur; | |
755 | ||
756 | spin_lock_irqsave(&svm->ir_list_lock, flags); | |
757 | list_for_each_entry(cur, &svm->ir_list, node) { | |
758 | if (cur->data != pi->ir_data) | |
759 | continue; | |
760 | list_del(&cur->node); | |
761 | kfree(cur); | |
762 | break; | |
763 | } | |
764 | spin_unlock_irqrestore(&svm->ir_list_lock, flags); | |
765 | } | |
766 | ||
767 | static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) | |
768 | { | |
769 | int ret = 0; | |
770 | unsigned long flags; | |
771 | struct amd_svm_iommu_ir *ir; | |
772 | ||
773 | /** | |
163b0991 | 774 | * In some cases, the existing irte is updated and re-set, |
ef0f6496 JR |
775 | * so we need to check here if it's already been * added |
776 | * to the ir_list. | |
777 | */ | |
778 | if (pi->ir_data && (pi->prev_ga_tag != 0)) { | |
779 | struct kvm *kvm = svm->vcpu.kvm; | |
780 | u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag); | |
781 | struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); | |
782 | struct vcpu_svm *prev_svm; | |
783 | ||
784 | if (!prev_vcpu) { | |
785 | ret = -EINVAL; | |
786 | goto out; | |
787 | } | |
788 | ||
789 | prev_svm = to_svm(prev_vcpu); | |
790 | svm_ir_list_del(prev_svm, pi); | |
791 | } | |
792 | ||
793 | /** | |
794 | * Allocating new amd_iommu_pi_data, which will get | |
795 | * add to the per-vcpu ir_list. | |
796 | */ | |
797 | ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT); | |
798 | if (!ir) { | |
799 | ret = -ENOMEM; | |
800 | goto out; | |
801 | } | |
802 | ir->data = pi->ir_data; | |
803 | ||
804 | spin_lock_irqsave(&svm->ir_list_lock, flags); | |
805 | list_add(&ir->node, &svm->ir_list); | |
806 | spin_unlock_irqrestore(&svm->ir_list_lock, flags); | |
807 | out: | |
808 | return ret; | |
809 | } | |
810 | ||
02ffbe63 | 811 | /* |
ef0f6496 JR |
812 | * Note: |
813 | * The HW cannot support posting multicast/broadcast | |
814 | * interrupts to a vCPU. So, we still use legacy interrupt | |
815 | * remapping for these kind of interrupts. | |
816 | * | |
817 | * For lowest-priority interrupts, we only support | |
818 | * those with single CPU as the destination, e.g. user | |
819 | * configures the interrupts via /proc/irq or uses | |
820 | * irqbalance to make the interrupts single-CPU. | |
821 | */ | |
822 | static int | |
823 | get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, | |
824 | struct vcpu_data *vcpu_info, struct vcpu_svm **svm) | |
825 | { | |
826 | struct kvm_lapic_irq irq; | |
827 | struct kvm_vcpu *vcpu = NULL; | |
828 | ||
829 | kvm_set_msi_irq(kvm, e, &irq); | |
830 | ||
831 | if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) || | |
832 | !kvm_irq_is_postable(&irq)) { | |
833 | pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n", | |
834 | __func__, irq.vector); | |
835 | return -1; | |
836 | } | |
837 | ||
838 | pr_debug("SVM: %s: use GA mode for irq %u\n", __func__, | |
839 | irq.vector); | |
840 | *svm = to_svm(vcpu); | |
841 | vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page)); | |
842 | vcpu_info->vector = irq.vector; | |
843 | ||
844 | return 0; | |
845 | } | |
846 | ||
847 | /* | |
db6e7adf | 848 | * avic_pi_update_irte - set IRTE for Posted-Interrupts |
ef0f6496 JR |
849 | * |
850 | * @kvm: kvm | |
851 | * @host_irq: host irq of the interrupt | |
852 | * @guest_irq: gsi of the interrupt | |
853 | * @set: set or unset PI | |
854 | * returns 0 on success, < 0 on failure | |
855 | */ | |
db6e7adf SC |
856 | int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, |
857 | uint32_t guest_irq, bool set) | |
ef0f6496 JR |
858 | { |
859 | struct kvm_kernel_irq_routing_entry *e; | |
860 | struct kvm_irq_routing_table *irq_rt; | |
a80ced6e | 861 | int idx, ret = 0; |
ef0f6496 JR |
862 | |
863 | if (!kvm_arch_has_assigned_device(kvm) || | |
864 | !irq_remapping_cap(IRQ_POSTING_CAP)) | |
865 | return 0; | |
866 | ||
867 | pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n", | |
868 | __func__, host_irq, guest_irq, set); | |
869 | ||
870 | idx = srcu_read_lock(&kvm->irq_srcu); | |
871 | irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); | |
a80ced6e YW |
872 | |
873 | if (guest_irq >= irq_rt->nr_rt_entries || | |
874 | hlist_empty(&irq_rt->map[guest_irq])) { | |
875 | pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n", | |
876 | guest_irq, irq_rt->nr_rt_entries); | |
877 | goto out; | |
878 | } | |
ef0f6496 JR |
879 | |
880 | hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { | |
881 | struct vcpu_data vcpu_info; | |
882 | struct vcpu_svm *svm = NULL; | |
883 | ||
884 | if (e->type != KVM_IRQ_ROUTING_MSI) | |
885 | continue; | |
886 | ||
887 | /** | |
888 | * Here, we setup with legacy mode in the following cases: | |
889 | * 1. When cannot target interrupt to a specific vcpu. | |
890 | * 2. Unsetting posted interrupt. | |
d9f6e12f | 891 | * 3. APIC virtualization is disabled for the vcpu. |
ef0f6496 JR |
892 | * 4. IRQ has incompatible delivery mode (SMI, INIT, etc) |
893 | */ | |
894 | if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set && | |
895 | kvm_vcpu_apicv_active(&svm->vcpu)) { | |
896 | struct amd_iommu_pi_data pi; | |
897 | ||
898 | /* Try to enable guest_mode in IRTE */ | |
899 | pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & | |
900 | AVIC_HPA_MASK); | |
901 | pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id, | |
902 | svm->vcpu.vcpu_id); | |
903 | pi.is_guest_mode = true; | |
904 | pi.vcpu_data = &vcpu_info; | |
905 | ret = irq_set_vcpu_affinity(host_irq, &pi); | |
906 | ||
907 | /** | |
908 | * Here, we successfully setting up vcpu affinity in | |
909 | * IOMMU guest mode. Now, we need to store the posted | |
910 | * interrupt information in a per-vcpu ir_list so that | |
911 | * we can reference to them directly when we update vcpu | |
912 | * scheduling information in IOMMU irte. | |
913 | */ | |
914 | if (!ret && pi.is_guest_mode) | |
915 | svm_ir_list_add(svm, &pi); | |
916 | } else { | |
917 | /* Use legacy mode in IRTE */ | |
918 | struct amd_iommu_pi_data pi; | |
919 | ||
920 | /** | |
921 | * Here, pi is used to: | |
922 | * - Tell IOMMU to use legacy mode for this interrupt. | |
923 | * - Retrieve ga_tag of prior interrupt remapping data. | |
924 | */ | |
f6426ab9 | 925 | pi.prev_ga_tag = 0; |
ef0f6496 JR |
926 | pi.is_guest_mode = false; |
927 | ret = irq_set_vcpu_affinity(host_irq, &pi); | |
928 | ||
929 | /** | |
930 | * Check if the posted interrupt was previously | |
931 | * setup with the guest_mode by checking if the ga_tag | |
932 | * was cached. If so, we need to clean up the per-vcpu | |
933 | * ir_list. | |
934 | */ | |
935 | if (!ret && pi.prev_ga_tag) { | |
936 | int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); | |
937 | struct kvm_vcpu *vcpu; | |
938 | ||
939 | vcpu = kvm_get_vcpu_by_id(kvm, id); | |
940 | if (vcpu) | |
941 | svm_ir_list_del(to_svm(vcpu), &pi); | |
942 | } | |
943 | } | |
944 | ||
945 | if (!ret && svm) { | |
946 | trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id, | |
947 | e->gsi, vcpu_info.vector, | |
948 | vcpu_info.pi_desc_addr, set); | |
949 | } | |
950 | ||
951 | if (ret < 0) { | |
952 | pr_err("%s: failed to update PI IRTE\n", __func__); | |
953 | goto out; | |
954 | } | |
955 | } | |
956 | ||
957 | ret = 0; | |
958 | out: | |
959 | srcu_read_unlock(&kvm->irq_srcu, idx); | |
960 | return ret; | |
961 | } | |
962 | ||
7491b7b2 | 963 | bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason) |
ef0f6496 JR |
964 | { |
965 | ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | | |
ef8b4b72 | 966 | BIT(APICV_INHIBIT_REASON_ABSENT) | |
ef0f6496 JR |
967 | BIT(APICV_INHIBIT_REASON_HYPERV) | |
968 | BIT(APICV_INHIBIT_REASON_NESTED) | | |
969 | BIT(APICV_INHIBIT_REASON_IRQWIN) | | |
970 | BIT(APICV_INHIBIT_REASON_PIT_REINJ) | | |
c538dc79 | 971 | BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | |
5bdae49f | 972 | BIT(APICV_INHIBIT_REASON_SEV) | |
5063c41b | 973 | BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | |
3743c2f0 | 974 | BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | |
9a364857 SC |
975 | BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED) | |
976 | BIT(APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED); | |
ef0f6496 | 977 | |
7491b7b2 | 978 | return supported & BIT(reason); |
ef0f6496 JR |
979 | } |
980 | ||
ef0f6496 JR |
981 | |
982 | static inline int | |
983 | avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) | |
984 | { | |
985 | int ret = 0; | |
986 | unsigned long flags; | |
987 | struct amd_svm_iommu_ir *ir; | |
988 | struct vcpu_svm *svm = to_svm(vcpu); | |
989 | ||
990 | if (!kvm_arch_has_assigned_device(vcpu->kvm)) | |
991 | return 0; | |
992 | ||
993 | /* | |
994 | * Here, we go through the per-vcpu ir_list to update all existing | |
995 | * interrupt remapping table entry targeting this vcpu. | |
996 | */ | |
997 | spin_lock_irqsave(&svm->ir_list_lock, flags); | |
998 | ||
999 | if (list_empty(&svm->ir_list)) | |
1000 | goto out; | |
1001 | ||
1002 | list_for_each_entry(ir, &svm->ir_list, node) { | |
1003 | ret = amd_iommu_update_ga(cpu, r, ir->data); | |
1004 | if (ret) | |
1005 | break; | |
1006 | } | |
1007 | out: | |
1008 | spin_unlock_irqrestore(&svm->ir_list_lock, flags); | |
1009 | return ret; | |
1010 | } | |
1011 | ||
ba8ec273 | 1012 | void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
ef0f6496 JR |
1013 | { |
1014 | u64 entry; | |
ef0f6496 JR |
1015 | int h_physical_id = kvm_cpu_get_apicid(cpu); |
1016 | struct vcpu_svm *svm = to_svm(vcpu); | |
1017 | ||
935a7333 SC |
1018 | lockdep_assert_preemption_disabled(); |
1019 | ||
4a204f78 | 1020 | if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) |
ef0f6496 JR |
1021 | return; |
1022 | ||
782f6455 SC |
1023 | /* |
1024 | * No need to update anything if the vCPU is blocking, i.e. if the vCPU | |
1025 | * is being scheduled in after being preempted. The CPU entries in the | |
1026 | * Physical APIC table and IRTE are consumed iff IsRun{ning} is '1'. | |
1027 | * If the vCPU was migrated, its new CPU value will be stuffed when the | |
1028 | * vCPU unblocks. | |
1029 | */ | |
1030 | if (kvm_vcpu_is_blocking(vcpu)) | |
1031 | return; | |
1032 | ||
ef0f6496 | 1033 | entry = READ_ONCE(*(svm->avic_physical_id_cache)); |
ef0f6496 JR |
1034 | |
1035 | entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK; | |
1036 | entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK); | |
782f6455 | 1037 | entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; |
ef0f6496 JR |
1038 | |
1039 | WRITE_ONCE(*(svm->avic_physical_id_cache), entry); | |
782f6455 | 1040 | avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true); |
ef0f6496 JR |
1041 | } |
1042 | ||
ba8ec273 | 1043 | void avic_vcpu_put(struct kvm_vcpu *vcpu) |
ef0f6496 JR |
1044 | { |
1045 | u64 entry; | |
1046 | struct vcpu_svm *svm = to_svm(vcpu); | |
1047 | ||
935a7333 SC |
1048 | lockdep_assert_preemption_disabled(); |
1049 | ||
ef0f6496 | 1050 | entry = READ_ONCE(*(svm->avic_physical_id_cache)); |
782f6455 SC |
1051 | |
1052 | /* Nothing to do if IsRunning == '0' due to vCPU blocking. */ | |
1053 | if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)) | |
1054 | return; | |
1055 | ||
1056 | avic_update_iommu_vcpu_affinity(vcpu, -1, 0); | |
ef0f6496 JR |
1057 | |
1058 | entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; | |
1059 | WRITE_ONCE(*(svm->avic_physical_id_cache), entry); | |
1060 | } | |
1061 | ||
e0bead97 | 1062 | void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu) |
b652de1e SC |
1063 | { |
1064 | struct vcpu_svm *svm = to_svm(vcpu); | |
1065 | struct vmcb *vmcb = svm->vmcb01.ptr; | |
e0bead97 | 1066 | |
f628a34a | 1067 | if (!lapic_in_kernel(vcpu) || !enable_apicv) |
b652de1e SC |
1068 | return; |
1069 | ||
e0bead97 | 1070 | if (kvm_vcpu_apicv_active(vcpu)) { |
b652de1e SC |
1071 | /** |
1072 | * During AVIC temporary deactivation, guest could update | |
1073 | * APIC ID, DFR and LDR registers, which would not be trapped | |
1074 | * by avic_unaccelerated_access_interception(). In this case, | |
1075 | * we need to check and update the AVIC logical APIC ID table | |
1076 | * accordingly before re-activating. | |
1077 | */ | |
1078 | avic_apicv_post_state_restore(vcpu); | |
4d1d7942 | 1079 | avic_activate_vmcb(svm); |
b652de1e | 1080 | } else { |
4d1d7942 | 1081 | avic_deactivate_vmcb(svm); |
b652de1e SC |
1082 | } |
1083 | vmcb_mark_dirty(vmcb, VMCB_AVIC); | |
e0bead97 SC |
1084 | } |
1085 | ||
1086 | void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) | |
1087 | { | |
1088 | bool activated = kvm_vcpu_apicv_active(vcpu); | |
1089 | ||
1090 | if (!enable_apicv) | |
1091 | return; | |
1092 | ||
1093 | avic_refresh_virtual_apic_mode(vcpu); | |
b652de1e SC |
1094 | |
1095 | if (activated) | |
ba8ec273 | 1096 | avic_vcpu_load(vcpu, vcpu->cpu); |
b652de1e SC |
1097 | else |
1098 | avic_vcpu_put(vcpu); | |
1099 | ||
1100 | avic_set_pi_irte_mode(vcpu, activated); | |
1101 | } | |
1102 | ||
a3c19d5b | 1103 | void avic_vcpu_blocking(struct kvm_vcpu *vcpu) |
ef0f6496 | 1104 | { |
935a7333 SC |
1105 | if (!kvm_vcpu_apicv_active(vcpu)) |
1106 | return; | |
1107 | ||
935a7333 SC |
1108 | /* |
1109 | * Unload the AVIC when the vCPU is about to block, _before_ | |
1110 | * the vCPU actually blocks. | |
1111 | * | |
1112 | * Any IRQs that arrive before IsRunning=0 will not cause an | |
1113 | * incomplete IPI vmexit on the source, therefore vIRR will also | |
1114 | * be checked by kvm_vcpu_check_block() before blocking. The | |
1115 | * memory barrier implicit in set_current_state orders writing | |
1116 | * IsRunning=0 before reading the vIRR. The processor needs a | |
1117 | * matching memory barrier on interrupt delivery between writing | |
1118 | * IRR and reading IsRunning; the lack of this barrier might be | |
1119 | * the cause of errata #1235). | |
1120 | */ | |
1121 | avic_vcpu_put(vcpu); | |
ef0f6496 JR |
1122 | } |
1123 | ||
a3c19d5b | 1124 | void avic_vcpu_unblocking(struct kvm_vcpu *vcpu) |
ef0f6496 | 1125 | { |
935a7333 SC |
1126 | if (!kvm_vcpu_apicv_active(vcpu)) |
1127 | return; | |
1128 | ||
ba8ec273 | 1129 | avic_vcpu_load(vcpu, vcpu->cpu); |
ef0f6496 | 1130 | } |
4bdec12a SS |
1131 | |
1132 | /* | |
1133 | * Note: | |
1134 | * - The module param avic enable both xAPIC and x2APIC mode. | |
1135 | * - Hypervisor can support both xAVIC and x2AVIC in the same guest. | |
1136 | * - The mode can be switched at run-time. | |
1137 | */ | |
1138 | bool avic_hardware_setup(struct kvm_x86_ops *x86_ops) | |
1139 | { | |
1140 | if (!npt_enabled) | |
1141 | return false; | |
1142 | ||
f628a34a SC |
1143 | /* AVIC is a prerequisite for x2AVIC. */ |
1144 | if (!boot_cpu_has(X86_FEATURE_AVIC) && !force_avic) { | |
1145 | if (boot_cpu_has(X86_FEATURE_X2AVIC)) { | |
1146 | pr_warn(FW_BUG "Cannot support x2AVIC due to AVIC is disabled"); | |
1147 | pr_warn(FW_BUG "Try enable AVIC using force_avic option"); | |
1148 | } | |
1149 | return false; | |
1150 | } | |
1151 | ||
4bdec12a | 1152 | if (boot_cpu_has(X86_FEATURE_AVIC)) { |
4bdec12a SS |
1153 | pr_info("AVIC enabled\n"); |
1154 | } else if (force_avic) { | |
1155 | /* | |
1156 | * Some older systems does not advertise AVIC support. | |
1157 | * See Revision Guide for specific AMD processor for more detail. | |
1158 | */ | |
4bdec12a SS |
1159 | pr_warn("AVIC is not supported in CPUID but force enabled"); |
1160 | pr_warn("Your system might crash and burn"); | |
1161 | } | |
1162 | ||
1163 | /* AVIC is a prerequisite for x2AVIC. */ | |
f628a34a SC |
1164 | x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC); |
1165 | if (x2avic_enabled) | |
1166 | pr_info("x2AVIC enabled\n"); | |
4bdec12a | 1167 | |
f628a34a | 1168 | amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); |
4bdec12a | 1169 | |
f628a34a | 1170 | return true; |
4bdec12a | 1171 | } |