Commit | Line | Data |
---|---|---|
7a338472 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
783e9e51 | 2 | /* |
cc68765d | 3 | * tools/testing/selftests/kvm/lib/x86_64/processor.c |
783e9e51 PB |
4 | * |
5 | * Copyright (C) 2018, Google LLC. | |
783e9e51 PB |
6 | */ |
7 | ||
783e9e51 PB |
8 | #include "test_util.h" |
9 | #include "kvm_util.h" | |
cc68765d | 10 | #include "processor.h" |
783e9e51 | 11 | |
29faeb96 AL |
12 | #ifndef NUM_INTERRUPTS |
13 | #define NUM_INTERRUPTS 256 | |
14 | #endif | |
15 | ||
16 | #define DEFAULT_CODE_SELECTOR 0x8 | |
17 | #define DEFAULT_DATA_SELECTOR 0x10 | |
18 | ||
fc66963d SC |
19 | #define MAX_NR_CPUID_ENTRIES 100 |
20 | ||
29faeb96 | 21 | vm_vaddr_t exception_handlers; |
e6df2ae3 VA |
22 | bool host_cpu_is_amd; |
23 | bool host_cpu_is_intel; | |
29faeb96 | 24 | |
b938cafd | 25 | static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) |
783e9e51 PB |
26 | { |
27 | fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " | |
28 | "rcx: 0x%.16llx rdx: 0x%.16llx\n", | |
29 | indent, "", | |
30 | regs->rax, regs->rbx, regs->rcx, regs->rdx); | |
31 | fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx " | |
32 | "rsp: 0x%.16llx rbp: 0x%.16llx\n", | |
33 | indent, "", | |
34 | regs->rsi, regs->rdi, regs->rsp, regs->rbp); | |
35 | fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx " | |
36 | "r10: 0x%.16llx r11: 0x%.16llx\n", | |
37 | indent, "", | |
38 | regs->r8, regs->r9, regs->r10, regs->r11); | |
39 | fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx " | |
40 | "r14: 0x%.16llx r15: 0x%.16llx\n", | |
41 | indent, "", | |
42 | regs->r12, regs->r13, regs->r14, regs->r15); | |
43 | fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n", | |
44 | indent, "", | |
45 | regs->rip, regs->rflags); | |
46 | } | |
47 | ||
783e9e51 PB |
48 | static void segment_dump(FILE *stream, struct kvm_segment *segment, |
49 | uint8_t indent) | |
50 | { | |
51 | fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " | |
52 | "selector: 0x%.4x type: 0x%.2x\n", | |
53 | indent, "", segment->base, segment->limit, | |
54 | segment->selector, segment->type); | |
55 | fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x " | |
56 | "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n", | |
57 | indent, "", segment->present, segment->dpl, | |
58 | segment->db, segment->s, segment->l); | |
59 | fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x " | |
60 | "unusable: 0x%.2x padding: 0x%.2x\n", | |
61 | indent, "", segment->g, segment->avl, | |
62 | segment->unusable, segment->padding); | |
63 | } | |
64 | ||
783e9e51 PB |
65 | static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, |
66 | uint8_t indent) | |
67 | { | |
68 | fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " | |
69 | "padding: 0x%.4x 0x%.4x 0x%.4x\n", | |
70 | indent, "", dtable->base, dtable->limit, | |
71 | dtable->padding[0], dtable->padding[1], dtable->padding[2]); | |
72 | } | |
73 | ||
b938cafd | 74 | static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent) |
783e9e51 PB |
75 | { |
76 | unsigned int i; | |
77 | ||
78 | fprintf(stream, "%*scs:\n", indent, ""); | |
79 | segment_dump(stream, &sregs->cs, indent + 2); | |
80 | fprintf(stream, "%*sds:\n", indent, ""); | |
81 | segment_dump(stream, &sregs->ds, indent + 2); | |
82 | fprintf(stream, "%*ses:\n", indent, ""); | |
83 | segment_dump(stream, &sregs->es, indent + 2); | |
84 | fprintf(stream, "%*sfs:\n", indent, ""); | |
85 | segment_dump(stream, &sregs->fs, indent + 2); | |
86 | fprintf(stream, "%*sgs:\n", indent, ""); | |
87 | segment_dump(stream, &sregs->gs, indent + 2); | |
88 | fprintf(stream, "%*sss:\n", indent, ""); | |
89 | segment_dump(stream, &sregs->ss, indent + 2); | |
90 | fprintf(stream, "%*str:\n", indent, ""); | |
91 | segment_dump(stream, &sregs->tr, indent + 2); | |
92 | fprintf(stream, "%*sldt:\n", indent, ""); | |
93 | segment_dump(stream, &sregs->ldt, indent + 2); | |
94 | ||
95 | fprintf(stream, "%*sgdt:\n", indent, ""); | |
96 | dtable_dump(stream, &sregs->gdt, indent + 2); | |
97 | fprintf(stream, "%*sidt:\n", indent, ""); | |
98 | dtable_dump(stream, &sregs->idt, indent + 2); | |
99 | ||
100 | fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx " | |
101 | "cr3: 0x%.16llx cr4: 0x%.16llx\n", | |
102 | indent, "", | |
103 | sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4); | |
104 | fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx " | |
105 | "apic_base: 0x%.16llx\n", | |
106 | indent, "", | |
107 | sregs->cr8, sregs->efer, sregs->apic_base); | |
108 | ||
109 | fprintf(stream, "%*sinterrupt_bitmap:\n", indent, ""); | |
110 | for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) { | |
111 | fprintf(stream, "%*s%.16llx\n", indent + 2, "", | |
112 | sregs->interrupt_bitmap[i]); | |
113 | } | |
114 | } | |
115 | ||
458e9874 DM |
116 | bool kvm_is_tdp_enabled(void) |
117 | { | |
e6df2ae3 | 118 | if (host_cpu_is_intel) |
458e9874 DM |
119 | return get_kvm_intel_param_bool("ept"); |
120 | else | |
121 | return get_kvm_amd_param_bool("npt"); | |
122 | } | |
123 | ||
9931be3f | 124 | void virt_arch_pgd_alloc(struct kvm_vm *vm) |
783e9e51 | 125 | { |
567a9f1e | 126 | TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " |
783e9e51 PB |
127 | "unknown or unsupported guest mode, mode: 0x%x", vm->mode); |
128 | ||
129 | /* If needed, create page map l4 table. */ | |
130 | if (!vm->pgd_created) { | |
cce0c23d | 131 | vm->pgd = vm_alloc_page_table(vm); |
783e9e51 PB |
132 | vm->pgd_created = true; |
133 | } | |
134 | } | |
135 | ||
ed0b58fc SC |
136 | static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte, |
137 | uint64_t vaddr, int level) | |
f681d686 | 138 | { |
ed0b58fc | 139 | uint64_t pt_gpa = PTE_GET_PA(*parent_pte); |
91add12d | 140 | uint64_t *page_table = addr_gpa2hva(vm, pt_gpa); |
4ee602e7 | 141 | int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; |
f681d686 | 142 | |
ed0b58fc SC |
143 | TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd, |
144 | "Parent PTE (level %d) not PRESENT for gva: 0x%08lx", | |
145 | level + 1, vaddr); | |
146 | ||
f681d686 SC |
147 | return &page_table[index]; |
148 | } | |
149 | ||
f18b4aeb | 150 | static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, |
ed0b58fc | 151 | uint64_t *parent_pte, |
f18b4aeb PB |
152 | uint64_t vaddr, |
153 | uint64_t paddr, | |
4ee602e7 DM |
154 | int current_level, |
155 | int target_level) | |
b007e904 | 156 | { |
ed0b58fc | 157 | uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level); |
f18b4aeb PB |
158 | |
159 | if (!(*pte & PTE_PRESENT_MASK)) { | |
160 | *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; | |
4ee602e7 | 161 | if (current_level == target_level) |
f18b4aeb | 162 | *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK); |
ad5f16e4 | 163 | else |
f18b4aeb | 164 | *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK; |
ad5f16e4 SC |
165 | } else { |
166 | /* | |
167 | * Entry already present. Assert that the caller doesn't want | |
168 | * a hugepage at this level, and that there isn't a hugepage at | |
169 | * this level. | |
170 | */ | |
4ee602e7 | 171 | TEST_ASSERT(current_level != target_level, |
ad5f16e4 | 172 | "Cannot create hugepage at level: %u, vaddr: 0x%lx\n", |
4ee602e7 | 173 | current_level, vaddr); |
f18b4aeb | 174 | TEST_ASSERT(!(*pte & PTE_LARGE_MASK), |
ad5f16e4 | 175 | "Cannot create page table at level: %u, vaddr: 0x%lx\n", |
4ee602e7 | 176 | current_level, vaddr); |
b007e904 SC |
177 | } |
178 | return pte; | |
179 | } | |
180 | ||
4ee602e7 | 181 | void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) |
783e9e51 | 182 | { |
4ee602e7 | 183 | const uint64_t pg_size = PG_LEVEL_SIZE(level); |
f18b4aeb PB |
184 | uint64_t *pml4e, *pdpe, *pde; |
185 | uint64_t *pte; | |
783e9e51 | 186 | |
ad5f16e4 SC |
187 | TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, |
188 | "Unknown or unsupported guest mode, mode: 0x%x", vm->mode); | |
783e9e51 | 189 | |
ad5f16e4 SC |
190 | TEST_ASSERT((vaddr % pg_size) == 0, |
191 | "Virtual address not aligned,\n" | |
192 | "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size); | |
193 | TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)), | |
194 | "Invalid virtual address, vaddr: 0x%lx", vaddr); | |
195 | TEST_ASSERT((paddr % pg_size) == 0, | |
196 | "Physical address not aligned,\n" | |
197 | " paddr: 0x%lx page size: 0x%lx", paddr, pg_size); | |
783e9e51 | 198 | TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, |
ad5f16e4 SC |
199 | "Physical address beyond maximum supported,\n" |
200 | " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", | |
201 | paddr, vm->max_gfn, vm->page_size); | |
202 | ||
203 | /* | |
204 | * Allocate upper level page tables, if not already present. Return | |
205 | * early if a hugepage was created. | |
206 | */ | |
ed0b58fc | 207 | pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level); |
f18b4aeb | 208 | if (*pml4e & PTE_LARGE_MASK) |
ad5f16e4 SC |
209 | return; |
210 | ||
ed0b58fc | 211 | pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level); |
f18b4aeb | 212 | if (*pdpe & PTE_LARGE_MASK) |
ad5f16e4 | 213 | return; |
783e9e51 | 214 | |
ed0b58fc | 215 | pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level); |
f18b4aeb | 216 | if (*pde & PTE_LARGE_MASK) |
ad5f16e4 | 217 | return; |
783e9e51 PB |
218 | |
219 | /* Fill in page table entry. */ | |
ed0b58fc | 220 | pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); |
f18b4aeb | 221 | TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), |
ad5f16e4 | 222 | "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr); |
f18b4aeb | 223 | *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); |
783e9e51 PB |
224 | } |
225 | ||
9931be3f | 226 | void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) |
ad5f16e4 | 227 | { |
4ee602e7 | 228 | __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K); |
ad5f16e4 SC |
229 | } |
230 | ||
458e9874 DM |
231 | void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, |
232 | uint64_t nr_bytes, int level) | |
233 | { | |
234 | uint64_t pg_size = PG_LEVEL_SIZE(level); | |
235 | uint64_t nr_pages = nr_bytes / pg_size; | |
236 | int i; | |
237 | ||
238 | TEST_ASSERT(nr_bytes % pg_size == 0, | |
239 | "Region size not aligned: nr_bytes: 0x%lx, page size: 0x%lx", | |
240 | nr_bytes, pg_size); | |
241 | ||
242 | for (i = 0; i < nr_pages; i++) { | |
243 | __virt_pg_map(vm, vaddr, paddr, level); | |
244 | ||
245 | vaddr += pg_size; | |
246 | paddr += pg_size; | |
247 | } | |
248 | } | |
249 | ||
96b69958 SC |
250 | static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) |
251 | { | |
252 | if (*pte & PTE_LARGE_MASK) { | |
253 | TEST_ASSERT(*level == PG_LEVEL_NONE || | |
254 | *level == current_level, | |
255 | "Unexpected hugepage at level %d\n", current_level); | |
256 | *level = current_level; | |
257 | } | |
258 | ||
259 | return *level == current_level; | |
260 | } | |
261 | ||
262 | uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, | |
263 | int *level) | |
39bbcc3a | 264 | { |
f18b4aeb | 265 | uint64_t *pml4e, *pdpe, *pde; |
39bbcc3a | 266 | |
96b69958 SC |
267 | TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM, |
268 | "Invalid PG_LEVEL_* '%d'", *level); | |
269 | ||
39bbcc3a AL |
270 | TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " |
271 | "unknown or unsupported guest mode, mode: 0x%x", vm->mode); | |
272 | TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, | |
273 | (vaddr >> vm->page_shift)), | |
274 | "Invalid virtual address, vaddr: 0x%lx", | |
275 | vaddr); | |
276 | /* | |
277 | * Based on the mode check above there are 48 bits in the vaddr, so | |
278 | * shift 16 to sign extend the last bit (bit-47), | |
279 | */ | |
280 | TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16), | |
281 | "Canonical check failed. The virtual address is invalid."); | |
282 | ||
99d51c6e | 283 | pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G); |
96b69958 SC |
284 | if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G)) |
285 | return pml4e; | |
39bbcc3a | 286 | |
99d51c6e | 287 | pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G); |
96b69958 SC |
288 | if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G)) |
289 | return pdpe; | |
39bbcc3a | 290 | |
99d51c6e | 291 | pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M); |
96b69958 SC |
292 | if (vm_is_target_pte(pde, level, PG_LEVEL_2M)) |
293 | return pde; | |
39bbcc3a | 294 | |
99d51c6e | 295 | return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); |
39bbcc3a AL |
296 | } |
297 | ||
96b69958 SC |
298 | uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) |
299 | { | |
300 | int level = PG_LEVEL_4K; | |
301 | ||
302 | return __vm_get_page_table_entry(vm, vaddr, &level); | |
303 | } | |
304 | ||
9931be3f | 305 | void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) |
783e9e51 | 306 | { |
f18b4aeb PB |
307 | uint64_t *pml4e, *pml4e_start; |
308 | uint64_t *pdpe, *pdpe_start; | |
309 | uint64_t *pde, *pde_start; | |
310 | uint64_t *pte, *pte_start; | |
783e9e51 PB |
311 | |
312 | if (!vm->pgd_created) | |
313 | return; | |
314 | ||
315 | fprintf(stream, "%*s " | |
316 | " no\n", indent, ""); | |
317 | fprintf(stream, "%*s index hvaddr gpaddr " | |
318 | "addr w exec dirty\n", | |
319 | indent, ""); | |
f18b4aeb | 320 | pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd); |
783e9e51 PB |
321 | for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { |
322 | pml4e = &pml4e_start[n1]; | |
f18b4aeb | 323 | if (!(*pml4e & PTE_PRESENT_MASK)) |
783e9e51 | 324 | continue; |
f18b4aeb | 325 | fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u " |
783e9e51 PB |
326 | " %u\n", |
327 | indent, "", | |
328 | pml4e - pml4e_start, pml4e, | |
f18b4aeb PB |
329 | addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e), |
330 | !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK)); | |
783e9e51 | 331 | |
f18b4aeb | 332 | pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK); |
783e9e51 PB |
333 | for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { |
334 | pdpe = &pdpe_start[n2]; | |
f18b4aeb | 335 | if (!(*pdpe & PTE_PRESENT_MASK)) |
783e9e51 | 336 | continue; |
f18b4aeb | 337 | fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx " |
783e9e51 PB |
338 | "%u %u\n", |
339 | indent, "", | |
340 | pdpe - pdpe_start, pdpe, | |
341 | addr_hva2gpa(vm, pdpe), | |
f18b4aeb PB |
342 | PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK), |
343 | !!(*pdpe & PTE_NX_MASK)); | |
783e9e51 | 344 | |
f18b4aeb | 345 | pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK); |
783e9e51 PB |
346 | for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { |
347 | pde = &pde_start[n3]; | |
f18b4aeb | 348 | if (!(*pde & PTE_PRESENT_MASK)) |
783e9e51 PB |
349 | continue; |
350 | fprintf(stream, "%*spde 0x%-3zx %p " | |
f18b4aeb | 351 | "0x%-12lx 0x%-10llx %u %u\n", |
783e9e51 PB |
352 | indent, "", pde - pde_start, pde, |
353 | addr_hva2gpa(vm, pde), | |
f18b4aeb PB |
354 | PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK), |
355 | !!(*pde & PTE_NX_MASK)); | |
783e9e51 | 356 | |
f18b4aeb | 357 | pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK); |
783e9e51 PB |
358 | for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { |
359 | pte = &pte_start[n4]; | |
f18b4aeb | 360 | if (!(*pte & PTE_PRESENT_MASK)) |
783e9e51 PB |
361 | continue; |
362 | fprintf(stream, "%*spte 0x%-3zx %p " | |
f18b4aeb | 363 | "0x%-12lx 0x%-10llx %u %u " |
783e9e51 PB |
364 | " %u 0x%-10lx\n", |
365 | indent, "", | |
366 | pte - pte_start, pte, | |
367 | addr_hva2gpa(vm, pte), | |
f18b4aeb PB |
368 | PTE_GET_PFN(*pte), |
369 | !!(*pte & PTE_WRITABLE_MASK), | |
370 | !!(*pte & PTE_NX_MASK), | |
371 | !!(*pte & PTE_DIRTY_MASK), | |
783e9e51 PB |
372 | ((uint64_t) n1 << 27) |
373 | | ((uint64_t) n2 << 18) | |
374 | | ((uint64_t) n3 << 9) | |
375 | | ((uint64_t) n4)); | |
376 | } | |
377 | } | |
378 | } | |
379 | } | |
380 | } | |
381 | ||
42593624 AJ |
382 | /* |
383 | * Set Unusable Segment | |
783e9e51 PB |
384 | * |
385 | * Input Args: None | |
386 | * | |
387 | * Output Args: | |
388 | * segp - Pointer to segment register | |
389 | * | |
390 | * Return: None | |
391 | * | |
42593624 | 392 | * Sets the segment register pointed to by @segp to an unusable state. |
783e9e51 PB |
393 | */ |
394 | static void kvm_seg_set_unusable(struct kvm_segment *segp) | |
395 | { | |
396 | memset(segp, 0, sizeof(*segp)); | |
397 | segp->unusable = true; | |
398 | } | |
399 | ||
2305339e PB |
400 | static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) |
401 | { | |
402 | void *gdt = addr_gva2hva(vm, vm->gdt); | |
403 | struct desc64 *desc = gdt + (segp->selector >> 3) * 8; | |
404 | ||
405 | desc->limit0 = segp->limit & 0xFFFF; | |
406 | desc->base0 = segp->base & 0xFFFF; | |
407 | desc->base1 = segp->base >> 16; | |
2305339e | 408 | desc->type = segp->type; |
df11f7dd | 409 | desc->s = segp->s; |
2305339e PB |
410 | desc->dpl = segp->dpl; |
411 | desc->p = segp->present; | |
412 | desc->limit1 = segp->limit >> 16; | |
df11f7dd | 413 | desc->avl = segp->avl; |
2305339e PB |
414 | desc->l = segp->l; |
415 | desc->db = segp->db; | |
416 | desc->g = segp->g; | |
417 | desc->base2 = segp->base >> 24; | |
418 | if (!segp->s) | |
419 | desc->base3 = segp->base >> 32; | |
420 | } | |
421 | ||
422 | ||
42593624 AJ |
423 | /* |
424 | * Set Long Mode Flat Kernel Code Segment | |
783e9e51 PB |
425 | * |
426 | * Input Args: | |
2305339e | 427 | * vm - VM whose GDT is being filled, or NULL to only write segp |
783e9e51 PB |
428 | * selector - selector value |
429 | * | |
430 | * Output Args: | |
431 | * segp - Pointer to KVM segment | |
432 | * | |
433 | * Return: None | |
434 | * | |
42593624 AJ |
435 | * Sets up the KVM segment pointed to by @segp, to be a code segment |
436 | * with the selector value given by @selector. | |
783e9e51 | 437 | */ |
2305339e | 438 | static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, |
783e9e51 PB |
439 | struct kvm_segment *segp) |
440 | { | |
441 | memset(segp, 0, sizeof(*segp)); | |
442 | segp->selector = selector; | |
443 | segp->limit = 0xFFFFFFFFu; | |
444 | segp->s = 0x1; /* kTypeCodeData */ | |
445 | segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed | |
446 | * | kFlagCodeReadable | |
447 | */ | |
448 | segp->g = true; | |
449 | segp->l = true; | |
450 | segp->present = 1; | |
2305339e PB |
451 | if (vm) |
452 | kvm_seg_fill_gdt_64bit(vm, segp); | |
783e9e51 PB |
453 | } |
454 | ||
42593624 AJ |
455 | /* |
456 | * Set Long Mode Flat Kernel Data Segment | |
783e9e51 PB |
457 | * |
458 | * Input Args: | |
2305339e | 459 | * vm - VM whose GDT is being filled, or NULL to only write segp |
783e9e51 PB |
460 | * selector - selector value |
461 | * | |
462 | * Output Args: | |
463 | * segp - Pointer to KVM segment | |
464 | * | |
465 | * Return: None | |
466 | * | |
42593624 AJ |
467 | * Sets up the KVM segment pointed to by @segp, to be a data segment |
468 | * with the selector value given by @selector. | |
783e9e51 | 469 | */ |
2305339e | 470 | static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, |
783e9e51 PB |
471 | struct kvm_segment *segp) |
472 | { | |
473 | memset(segp, 0, sizeof(*segp)); | |
474 | segp->selector = selector; | |
475 | segp->limit = 0xFFFFFFFFu; | |
476 | segp->s = 0x1; /* kTypeCodeData */ | |
477 | segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed | |
478 | * | kFlagDataWritable | |
479 | */ | |
480 | segp->g = true; | |
481 | segp->present = true; | |
2305339e PB |
482 | if (vm) |
483 | kvm_seg_fill_gdt_64bit(vm, segp); | |
783e9e51 PB |
484 | } |
485 | ||
9931be3f | 486 | vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) |
783e9e51 | 487 | { |
96b69958 SC |
488 | int level = PG_LEVEL_NONE; |
489 | uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level); | |
783e9e51 | 490 | |
efe91dc3 SC |
491 | TEST_ASSERT(*pte & PTE_PRESENT_MASK, |
492 | "Leaf PTE not PRESENT for gva: 0x%08lx", gva); | |
96b69958 SC |
493 | |
494 | /* | |
495 | * No need for a hugepage mask on the PTE, x86-64 requires the "unused" | |
496 | * address bits to be zero. | |
497 | */ | |
498 | return PTE_GET_PA(*pte) | (gva & ~HUGEPAGE_MASK(level)); | |
783e9e51 PB |
499 | } |
500 | ||
1dcd1c58 | 501 | static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt) |
2305339e PB |
502 | { |
503 | if (!vm->gdt) | |
1446e331 | 504 | vm->gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); |
2305339e PB |
505 | |
506 | dt->base = vm->gdt; | |
507 | dt->limit = getpagesize(); | |
508 | } | |
509 | ||
510 | static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, | |
1dcd1c58 | 511 | int selector) |
2305339e PB |
512 | { |
513 | if (!vm->tss) | |
1446e331 | 514 | vm->tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); |
2305339e PB |
515 | |
516 | memset(segp, 0, sizeof(*segp)); | |
517 | segp->base = vm->tss; | |
518 | segp->limit = 0x67; | |
519 | segp->selector = selector; | |
520 | segp->type = 0xb; | |
521 | segp->present = 1; | |
522 | kvm_seg_fill_gdt_64bit(vm, segp); | |
523 | } | |
524 | ||
768e9a61 | 525 | static void vcpu_setup(struct kvm_vm *vm, struct kvm_vcpu *vcpu) |
783e9e51 PB |
526 | { |
527 | struct kvm_sregs sregs; | |
528 | ||
529 | /* Set mode specific system register values. */ | |
768e9a61 | 530 | vcpu_sregs_get(vcpu, &sregs); |
783e9e51 | 531 | |
2305339e PB |
532 | sregs.idt.limit = 0; |
533 | ||
1dcd1c58 | 534 | kvm_setup_gdt(vm, &sregs.gdt); |
2305339e | 535 | |
783e9e51 | 536 | switch (vm->mode) { |
567a9f1e | 537 | case VM_MODE_PXXV48_4K: |
783e9e51 | 538 | sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; |
6c930268 | 539 | sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; |
783e9e51 PB |
540 | sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); |
541 | ||
542 | kvm_seg_set_unusable(&sregs.ldt); | |
29faeb96 AL |
543 | kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs); |
544 | kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds); | |
545 | kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es); | |
1dcd1c58 | 546 | kvm_setup_tss_64bit(vm, &sregs.tr, 0x18); |
783e9e51 PB |
547 | break; |
548 | ||
549 | default: | |
352be2c5 | 550 | TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); |
783e9e51 | 551 | } |
783e9e51 | 552 | |
2305339e | 553 | sregs.cr3 = vm->pgd; |
768e9a61 | 554 | vcpu_sregs_set(vcpu, &sregs); |
783e9e51 | 555 | } |
42593624 | 556 | |
2115713c VA |
557 | void kvm_arch_vm_post_create(struct kvm_vm *vm) |
558 | { | |
559 | vm_create_irqchip(vm); | |
e6df2ae3 VA |
560 | sync_global_to_guest(vm, host_cpu_is_intel); |
561 | sync_global_to_guest(vm, host_cpu_is_amd); | |
2115713c VA |
562 | } |
563 | ||
1422efd6 SC |
564 | struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, |
565 | void *guest_code) | |
783e9e51 PB |
566 | { |
567 | struct kvm_mp_state mp_state; | |
568 | struct kvm_regs regs; | |
569 | vm_vaddr_t stack_vaddr; | |
1422efd6 SC |
570 | struct kvm_vcpu *vcpu; |
571 | ||
1446e331 RK |
572 | stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), |
573 | DEFAULT_GUEST_STACK_VADDR_MIN, | |
574 | MEM_REGION_DATA); | |
783e9e51 | 575 | |
f742d94f | 576 | vcpu = __vm_vcpu_add(vm, vcpu_id); |
7fbc6038 | 577 | vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); |
768e9a61 | 578 | vcpu_setup(vm, vcpu); |
783e9e51 PB |
579 | |
580 | /* Setup guest general purpose registers */ | |
768e9a61 | 581 | vcpu_regs_get(vcpu, ®s); |
783e9e51 PB |
582 | regs.rflags = regs.rflags | 0x2; |
583 | regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()); | |
584 | regs.rip = (unsigned long) guest_code; | |
768e9a61 | 585 | vcpu_regs_set(vcpu, ®s); |
783e9e51 PB |
586 | |
587 | /* Setup the MP state */ | |
588 | mp_state.mp_state = 0; | |
768e9a61 | 589 | vcpu_mp_state_set(vcpu, &mp_state); |
1422efd6 SC |
590 | |
591 | return vcpu; | |
783e9e51 PB |
592 | } |
593 | ||
4c16fa3e SC |
594 | struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id) |
595 | { | |
596 | struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); | |
597 | ||
7fbc6038 | 598 | vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); |
4c16fa3e SC |
599 | |
600 | return vcpu; | |
601 | } | |
602 | ||
7fbc6038 SC |
603 | void vcpu_arch_free(struct kvm_vcpu *vcpu) |
604 | { | |
605 | if (vcpu->cpuid) | |
606 | free(vcpu->cpuid); | |
607 | } | |
608 | ||
cd5f3d21 SC |
609 | /* Do not use kvm_supported_cpuid directly except for validity checks. */ |
610 | static void *kvm_supported_cpuid; | |
611 | ||
813e38cd | 612 | const struct kvm_cpuid2 *kvm_get_supported_cpuid(void) |
eabe7881 | 613 | { |
eabe7881 AJ |
614 | int kvm_fd; |
615 | ||
cd5f3d21 SC |
616 | if (kvm_supported_cpuid) |
617 | return kvm_supported_cpuid; | |
eabe7881 | 618 | |
cd5f3d21 | 619 | kvm_supported_cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); |
2aab4b35 | 620 | kvm_fd = open_kvm_dev_path_or_exit(); |
eabe7881 | 621 | |
cd5f3d21 SC |
622 | kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, |
623 | (struct kvm_cpuid2 *)kvm_supported_cpuid); | |
eabe7881 AJ |
624 | |
625 | close(kvm_fd); | |
cd5f3d21 | 626 | return kvm_supported_cpuid; |
eabe7881 AJ |
627 | } |
628 | ||
a29e6e38 SC |
629 | static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid, |
630 | uint32_t function, uint32_t index, | |
631 | uint8_t reg, uint8_t lo, uint8_t hi) | |
61d76b8a SC |
632 | { |
633 | const struct kvm_cpuid_entry2 *entry; | |
634 | int i; | |
635 | ||
636 | for (i = 0; i < cpuid->nent; i++) { | |
637 | entry = &cpuid->entries[i]; | |
638 | ||
639 | /* | |
640 | * The output registers in kvm_cpuid_entry2 are in alphabetical | |
641 | * order, but kvm_x86_cpu_feature matches that mess, so yay | |
642 | * pointer shenanigans! | |
643 | */ | |
a29e6e38 SC |
644 | if (entry->function == function && entry->index == index) |
645 | return ((&entry->eax)[reg] & GENMASK(hi, lo)) >> lo; | |
61d76b8a SC |
646 | } |
647 | ||
a29e6e38 SC |
648 | return 0; |
649 | } | |
650 | ||
651 | bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, | |
652 | struct kvm_x86_cpu_feature feature) | |
653 | { | |
654 | return __kvm_cpu_has(cpuid, feature.function, feature.index, | |
655 | feature.reg, feature.bit, feature.bit); | |
61d76b8a SC |
656 | } |
657 | ||
40854713 SC |
658 | uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, |
659 | struct kvm_x86_cpu_property property) | |
660 | { | |
661 | return __kvm_cpu_has(cpuid, property.function, property.index, | |
662 | property.reg, property.lo_bit, property.hi_bit); | |
663 | } | |
664 | ||
f88d4f2f LX |
665 | uint64_t kvm_get_feature_msr(uint64_t msr_index) |
666 | { | |
667 | struct { | |
668 | struct kvm_msrs header; | |
669 | struct kvm_msr_entry entry; | |
670 | } buffer = {}; | |
671 | int r, kvm_fd; | |
672 | ||
673 | buffer.header.nmsrs = 1; | |
674 | buffer.entry.index = msr_index; | |
2aab4b35 | 675 | kvm_fd = open_kvm_dev_path_or_exit(); |
f88d4f2f | 676 | |
f9725f89 SC |
677 | r = __kvm_ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header); |
678 | TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_GET_MSRS, r)); | |
f88d4f2f LX |
679 | |
680 | close(kvm_fd); | |
681 | return buffer.entry.data; | |
682 | } | |
683 | ||
2ceade1d SC |
684 | void __vm_xsave_require_permission(int bit, const char *name) |
685 | { | |
686 | int kvm_fd; | |
687 | u64 bitmask; | |
688 | long rc; | |
689 | struct kvm_device_attr attr = { | |
690 | .group = 0, | |
691 | .attr = KVM_X86_XCOMP_GUEST_SUPP, | |
692 | .addr = (unsigned long) &bitmask | |
693 | }; | |
694 | ||
cd5f3d21 SC |
695 | TEST_ASSERT(!kvm_supported_cpuid, |
696 | "kvm_get_supported_cpuid() cannot be used before ARCH_REQ_XCOMP_GUEST_PERM"); | |
697 | ||
2ceade1d SC |
698 | kvm_fd = open_kvm_dev_path_or_exit(); |
699 | rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); | |
700 | close(kvm_fd); | |
701 | ||
702 | if (rc == -1 && (errno == ENXIO || errno == EINVAL)) | |
703 | __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported"); | |
704 | ||
705 | TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc); | |
706 | ||
707 | __TEST_REQUIRE(bitmask & (1ULL << bit), | |
708 | "Required XSAVE feature '%s' not supported", name); | |
709 | ||
710 | TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit)); | |
711 | ||
712 | rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); | |
713 | TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); | |
714 | TEST_ASSERT(bitmask & (1ULL << bit), | |
715 | "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx", | |
716 | bitmask); | |
717 | } | |
718 | ||
813e38cd | 719 | void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid) |
fb18d053 | 720 | { |
7fbc6038 | 721 | TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID"); |
fb18d053 | 722 | |
7fbc6038 SC |
723 | /* Allow overriding the default CPUID. */ |
724 | if (vcpu->cpuid && vcpu->cpuid->nent < cpuid->nent) { | |
725 | free(vcpu->cpuid); | |
726 | vcpu->cpuid = NULL; | |
fb18d053 VK |
727 | } |
728 | ||
7fbc6038 SC |
729 | if (!vcpu->cpuid) |
730 | vcpu->cpuid = allocate_kvm_cpuid2(cpuid->nent); | |
fb18d053 | 731 | |
7fbc6038 SC |
732 | memcpy(vcpu->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent)); |
733 | vcpu_set_cpuid(vcpu); | |
734 | } | |
fb18d053 | 735 | |
1940af0b SC |
736 | void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr) |
737 | { | |
738 | struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, 0x80000008); | |
739 | ||
740 | entry->eax = (entry->eax & ~0xff) | maxphyaddr; | |
741 | vcpu_set_cpuid(vcpu); | |
742 | } | |
743 | ||
3a5d36b3 SC |
744 | void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function) |
745 | { | |
746 | struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function); | |
747 | ||
748 | entry->eax = 0; | |
749 | entry->ebx = 0; | |
750 | entry->ecx = 0; | |
751 | entry->edx = 0; | |
752 | vcpu_set_cpuid(vcpu); | |
753 | } | |
754 | ||
c41880b5 SC |
755 | void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu, |
756 | struct kvm_x86_cpu_feature feature, | |
757 | bool set) | |
758 | { | |
759 | struct kvm_cpuid_entry2 *entry; | |
760 | u32 *reg; | |
761 | ||
762 | entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index); | |
763 | reg = (&entry->eax) + feature.reg; | |
764 | ||
765 | if (set) | |
766 | *reg |= BIT(feature.bit); | |
767 | else | |
768 | *reg &= ~BIT(feature.bit); | |
769 | ||
770 | vcpu_set_cpuid(vcpu); | |
771 | } | |
772 | ||
768e9a61 | 773 | uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index) |
eabe7881 | 774 | { |
eabe7881 AJ |
775 | struct { |
776 | struct kvm_msrs header; | |
777 | struct kvm_msr_entry entry; | |
778 | } buffer = {}; | |
eabe7881 | 779 | |
eabe7881 AJ |
780 | buffer.header.nmsrs = 1; |
781 | buffer.entry.index = msr_index; | |
ffb7c77f | 782 | |
768e9a61 | 783 | vcpu_msrs_get(vcpu, &buffer.header); |
eabe7881 AJ |
784 | |
785 | return buffer.entry.data; | |
786 | } | |
787 | ||
768e9a61 | 788 | int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value) |
eabe7881 | 789 | { |
eabe7881 AJ |
790 | struct { |
791 | struct kvm_msrs header; | |
792 | struct kvm_msr_entry entry; | |
793 | } buffer = {}; | |
eabe7881 | 794 | |
eabe7881 AJ |
795 | memset(&buffer, 0, sizeof(buffer)); |
796 | buffer.header.nmsrs = 1; | |
797 | buffer.entry.index = msr_index; | |
798 | buffer.entry.data = msr_value; | |
c90992bf | 799 | |
768e9a61 | 800 | return __vcpu_ioctl(vcpu, KVM_SET_MSRS, &buffer.header); |
eabe7881 AJ |
801 | } |
802 | ||
768e9a61 | 803 | void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) |
eabe7881 AJ |
804 | { |
805 | va_list ap; | |
806 | struct kvm_regs regs; | |
807 | ||
808 | TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" | |
809 | " num: %u\n", | |
810 | num); | |
811 | ||
812 | va_start(ap, num); | |
768e9a61 | 813 | vcpu_regs_get(vcpu, ®s); |
eabe7881 AJ |
814 | |
815 | if (num >= 1) | |
816 | regs.rdi = va_arg(ap, uint64_t); | |
817 | ||
818 | if (num >= 2) | |
819 | regs.rsi = va_arg(ap, uint64_t); | |
820 | ||
821 | if (num >= 3) | |
822 | regs.rdx = va_arg(ap, uint64_t); | |
823 | ||
824 | if (num >= 4) | |
825 | regs.rcx = va_arg(ap, uint64_t); | |
826 | ||
827 | if (num >= 5) | |
828 | regs.r8 = va_arg(ap, uint64_t); | |
829 | ||
830 | if (num >= 6) | |
831 | regs.r9 = va_arg(ap, uint64_t); | |
832 | ||
768e9a61 | 833 | vcpu_regs_set(vcpu, ®s); |
eabe7881 AJ |
834 | va_end(ap); |
835 | } | |
836 | ||
768e9a61 | 837 | void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) |
eabe7881 AJ |
838 | { |
839 | struct kvm_regs regs; | |
840 | struct kvm_sregs sregs; | |
841 | ||
768e9a61 | 842 | fprintf(stream, "%*svCPU ID: %u\n", indent, "", vcpu->id); |
eabe7881 AJ |
843 | |
844 | fprintf(stream, "%*sregs:\n", indent + 2, ""); | |
768e9a61 | 845 | vcpu_regs_get(vcpu, ®s); |
eabe7881 AJ |
846 | regs_dump(stream, ®s, indent + 4); |
847 | ||
848 | fprintf(stream, "%*ssregs:\n", indent + 2, ""); | |
768e9a61 | 849 | vcpu_sregs_get(vcpu, &sregs); |
eabe7881 AJ |
850 | sregs_dump(stream, &sregs, indent + 4); |
851 | } | |
852 | ||
2128e30b | 853 | static struct kvm_msr_list *__kvm_get_msr_index_list(bool feature_msrs) |
fa3899ad | 854 | { |
2128e30b | 855 | struct kvm_msr_list *list; |
fa3899ad | 856 | struct kvm_msr_list nmsrs; |
c095cb60 SC |
857 | int kvm_fd, r; |
858 | ||
c095cb60 | 859 | kvm_fd = open_kvm_dev_path_or_exit(); |
fa3899ad PB |
860 | |
861 | nmsrs.nmsrs = 0; | |
2128e30b SC |
862 | if (!feature_msrs) |
863 | r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs); | |
864 | else | |
865 | r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, &nmsrs); | |
866 | ||
f9725f89 | 867 | TEST_ASSERT(r == -1 && errno == E2BIG, |
c095cb60 SC |
868 | "Expected -E2BIG, got rc: %i errno: %i (%s)", |
869 | r, errno, strerror(errno)); | |
fa3899ad | 870 | |
c095cb60 SC |
871 | list = malloc(sizeof(*list) + nmsrs.nmsrs * sizeof(list->indices[0])); |
872 | TEST_ASSERT(list, "-ENOMEM when allocating MSR index list"); | |
873 | list->nmsrs = nmsrs.nmsrs; | |
fa3899ad | 874 | |
2128e30b SC |
875 | if (!feature_msrs) |
876 | kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); | |
877 | else | |
878 | kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list); | |
c095cb60 SC |
879 | close(kvm_fd); |
880 | ||
881 | TEST_ASSERT(list->nmsrs == nmsrs.nmsrs, | |
2128e30b | 882 | "Number of MSRs in list changed, was %d, now %d", |
c095cb60 SC |
883 | nmsrs.nmsrs, list->nmsrs); |
884 | return list; | |
c90992bf AL |
885 | } |
886 | ||
2128e30b SC |
887 | const struct kvm_msr_list *kvm_get_msr_index_list(void) |
888 | { | |
889 | static const struct kvm_msr_list *list; | |
890 | ||
891 | if (!list) | |
892 | list = __kvm_get_msr_index_list(false); | |
893 | return list; | |
894 | } | |
895 | ||
896 | ||
897 | const struct kvm_msr_list *kvm_get_feature_msr_index_list(void) | |
898 | { | |
899 | static const struct kvm_msr_list *list; | |
900 | ||
901 | if (!list) | |
902 | list = __kvm_get_msr_index_list(true); | |
903 | return list; | |
904 | } | |
905 | ||
c095cb60 | 906 | bool kvm_msr_is_in_save_restore_list(uint32_t msr_index) |
c90992bf | 907 | { |
c095cb60 SC |
908 | const struct kvm_msr_list *list = kvm_get_msr_index_list(); |
909 | int i; | |
c90992bf | 910 | |
c095cb60 SC |
911 | for (i = 0; i < list->nmsrs; ++i) { |
912 | if (list->indices[i] == msr_index) | |
913 | return true; | |
914 | } | |
c90992bf | 915 | |
c095cb60 | 916 | return false; |
c90992bf AL |
917 | } |
918 | ||
768e9a61 | 919 | static void vcpu_save_xsave_state(struct kvm_vcpu *vcpu, |
6ebfef83 | 920 | struct kvm_x86_state *state) |
415a3c33 | 921 | { |
768e9a61 | 922 | int size = vm_check_cap(vcpu->vm, KVM_CAP_XSAVE2); |
415a3c33 | 923 | |
6ebfef83 SC |
924 | if (size) { |
925 | state->xsave = malloc(size); | |
768e9a61 | 926 | vcpu_xsave2_get(vcpu, state->xsave); |
6ebfef83 SC |
927 | } else { |
928 | state->xsave = malloc(sizeof(struct kvm_xsave)); | |
768e9a61 | 929 | vcpu_xsave_get(vcpu, state->xsave); |
6ebfef83 | 930 | } |
415a3c33 WW |
931 | } |
932 | ||
768e9a61 | 933 | struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu) |
fa3899ad | 934 | { |
c095cb60 | 935 | const struct kvm_msr_list *msr_list = kvm_get_msr_index_list(); |
fa3899ad | 936 | struct kvm_x86_state *state; |
6ebfef83 SC |
937 | int i; |
938 | ||
cb547637 PB |
939 | static int nested_size = -1; |
940 | ||
941 | if (nested_size == -1) { | |
942 | nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE); | |
943 | TEST_ASSERT(nested_size <= sizeof(state->nested_), | |
944 | "Nested state size too big, %i > %zi", | |
945 | nested_size, sizeof(state->nested_)); | |
946 | } | |
fa3899ad | 947 | |
c68c21ca PB |
948 | /* |
949 | * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees | |
950 | * guest state is consistent only after userspace re-enters the | |
951 | * kernel with KVM_RUN. Complete IO prior to migrating state | |
952 | * to a new VM. | |
953 | */ | |
768e9a61 | 954 | vcpu_run_complete_io(vcpu); |
c68c21ca | 955 | |
c095cb60 | 956 | state = malloc(sizeof(*state) + msr_list->nmsrs * sizeof(state->msrs.entries[0])); |
fa3899ad | 957 | |
768e9a61 SC |
958 | vcpu_events_get(vcpu, &state->events); |
959 | vcpu_mp_state_get(vcpu, &state->mp_state); | |
960 | vcpu_regs_get(vcpu, &state->regs); | |
961 | vcpu_save_xsave_state(vcpu, state); | |
6ebfef83 | 962 | |
9393cb13 | 963 | if (kvm_has_cap(KVM_CAP_XCRS)) |
768e9a61 | 964 | vcpu_xcrs_get(vcpu, &state->xcrs); |
6ebfef83 | 965 | |
768e9a61 | 966 | vcpu_sregs_get(vcpu, &state->sregs); |
fa3899ad | 967 | |
cb547637 PB |
968 | if (nested_size) { |
969 | state->nested.size = sizeof(state->nested_); | |
6ebfef83 | 970 | |
768e9a61 | 971 | vcpu_nested_state_get(vcpu, &state->nested); |
cb547637 | 972 | TEST_ASSERT(state->nested.size <= nested_size, |
96c852c8 PB |
973 | "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", |
974 | state->nested.size, nested_size); | |
6ebfef83 | 975 | } else { |
cb547637 | 976 | state->nested.size = 0; |
6ebfef83 | 977 | } |
cb547637 | 978 | |
c095cb60 SC |
979 | state->msrs.nmsrs = msr_list->nmsrs; |
980 | for (i = 0; i < msr_list->nmsrs; i++) | |
981 | state->msrs.entries[i].index = msr_list->indices[i]; | |
768e9a61 | 982 | vcpu_msrs_get(vcpu, &state->msrs); |
fa3899ad | 983 | |
768e9a61 | 984 | vcpu_debugregs_get(vcpu, &state->debugregs); |
fa3899ad | 985 | |
fa3899ad PB |
986 | return state; |
987 | } | |
988 | ||
768e9a61 | 989 | void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state) |
fa3899ad | 990 | { |
768e9a61 SC |
991 | vcpu_sregs_set(vcpu, &state->sregs); |
992 | vcpu_msrs_set(vcpu, &state->msrs); | |
fa3899ad | 993 | |
9393cb13 | 994 | if (kvm_has_cap(KVM_CAP_XCRS)) |
768e9a61 | 995 | vcpu_xcrs_set(vcpu, &state->xcrs); |
fa3899ad | 996 | |
768e9a61 SC |
997 | vcpu_xsave_set(vcpu, state->xsave); |
998 | vcpu_events_set(vcpu, &state->events); | |
999 | vcpu_mp_state_set(vcpu, &state->mp_state); | |
1000 | vcpu_debugregs_set(vcpu, &state->debugregs); | |
1001 | vcpu_regs_set(vcpu, &state->regs); | |
fa3899ad | 1002 | |
6ebfef83 | 1003 | if (state->nested.size) |
768e9a61 | 1004 | vcpu_nested_state_set(vcpu, &state->nested); |
fa3899ad | 1005 | } |
9dba988e | 1006 | |
415a3c33 WW |
1007 | void kvm_x86_state_cleanup(struct kvm_x86_state *state) |
1008 | { | |
1009 | free(state->xsave); | |
1010 | free(state); | |
1011 | } | |
1012 | ||
567a9f1e PX |
1013 | void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) |
1014 | { | |
40854713 | 1015 | if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) { |
1525429f | 1016 | *pa_bits = kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32; |
567a9f1e PX |
1017 | *va_bits = 32; |
1018 | } else { | |
40854713 SC |
1019 | *pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR); |
1020 | *va_bits = kvm_cpu_property(X86_PROPERTY_MAX_VIRT_ADDR); | |
567a9f1e PX |
1021 | } |
1022 | } | |
29faeb96 | 1023 | |
29faeb96 AL |
1024 | static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, |
1025 | int dpl, unsigned short selector) | |
1026 | { | |
1027 | struct idt_entry *base = | |
1028 | (struct idt_entry *)addr_gva2hva(vm, vm->idt); | |
1029 | struct idt_entry *e = &base[vector]; | |
1030 | ||
1031 | memset(e, 0, sizeof(*e)); | |
1032 | e->offset0 = addr; | |
1033 | e->selector = selector; | |
1034 | e->ist = 0; | |
1035 | e->type = 14; | |
1036 | e->dpl = dpl; | |
1037 | e->p = 1; | |
1038 | e->offset1 = addr >> 16; | |
1039 | e->offset2 = addr >> 32; | |
1040 | } | |
1041 | ||
3b23054c SC |
1042 | |
1043 | static bool kvm_fixup_exception(struct ex_regs *regs) | |
1044 | { | |
1045 | if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10) | |
1046 | return false; | |
1047 | ||
1048 | if (regs->vector == DE_VECTOR) | |
1049 | return false; | |
1050 | ||
1051 | regs->rip = regs->r11; | |
1052 | regs->r9 = regs->vector; | |
b9635930 | 1053 | regs->r10 = regs->error_code; |
3b23054c SC |
1054 | return true; |
1055 | } | |
1056 | ||
29faeb96 AL |
1057 | void kvm_exit_unexpected_vector(uint32_t value) |
1058 | { | |
75275d7f | 1059 | ucall(UCALL_UNHANDLED, 1, value); |
29faeb96 AL |
1060 | } |
1061 | ||
1062 | void route_exception(struct ex_regs *regs) | |
1063 | { | |
1064 | typedef void(*handler)(struct ex_regs *); | |
1065 | handler *handlers = (handler *)exception_handlers; | |
1066 | ||
1067 | if (handlers && handlers[regs->vector]) { | |
1068 | handlers[regs->vector](regs); | |
1069 | return; | |
1070 | } | |
1071 | ||
3b23054c SC |
1072 | if (kvm_fixup_exception(regs)) |
1073 | return; | |
1074 | ||
29faeb96 AL |
1075 | kvm_exit_unexpected_vector(regs->vector); |
1076 | } | |
1077 | ||
1078 | void vm_init_descriptor_tables(struct kvm_vm *vm) | |
1079 | { | |
1080 | extern void *idt_handlers; | |
1081 | int i; | |
1082 | ||
1446e331 RK |
1083 | vm->idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); |
1084 | vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); | |
29faeb96 AL |
1085 | /* Handlers have the same address in both address spaces.*/ |
1086 | for (i = 0; i < NUM_INTERRUPTS; i++) | |
1087 | set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, | |
1088 | DEFAULT_CODE_SELECTOR); | |
1089 | } | |
1090 | ||
768e9a61 | 1091 | void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) |
29faeb96 | 1092 | { |
768e9a61 | 1093 | struct kvm_vm *vm = vcpu->vm; |
29faeb96 AL |
1094 | struct kvm_sregs sregs; |
1095 | ||
768e9a61 | 1096 | vcpu_sregs_get(vcpu, &sregs); |
29faeb96 AL |
1097 | sregs.idt.base = vm->idt; |
1098 | sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1; | |
1099 | sregs.gdt.base = vm->gdt; | |
1100 | sregs.gdt.limit = getpagesize() - 1; | |
1101 | kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs); | |
768e9a61 | 1102 | vcpu_sregs_set(vcpu, &sregs); |
29faeb96 AL |
1103 | *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; |
1104 | } | |
1105 | ||
b78f4a59 RK |
1106 | void vm_install_exception_handler(struct kvm_vm *vm, int vector, |
1107 | void (*handler)(struct ex_regs *)) | |
29faeb96 AL |
1108 | { |
1109 | vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); | |
1110 | ||
1111 | handlers[vector] = (vm_vaddr_t)handler; | |
1112 | } | |
1113 | ||
768e9a61 | 1114 | void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) |
29faeb96 | 1115 | { |
75275d7f RK |
1116 | struct ucall uc; |
1117 | ||
768e9a61 | 1118 | if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) { |
75275d7f RK |
1119 | uint64_t vector = uc.args[0]; |
1120 | ||
1121 | TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)", | |
1122 | vector); | |
29faeb96 AL |
1123 | } |
1124 | } | |
ac4a4d6d | 1125 | |
813e38cd SC |
1126 | const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, |
1127 | uint32_t function, uint32_t index) | |
ecebb966 VK |
1128 | { |
1129 | int i; | |
1130 | ||
1131 | for (i = 0; i < cpuid->nent; i++) { | |
8b026741 SC |
1132 | if (cpuid->entries[i].function == function && |
1133 | cpuid->entries[i].index == index) | |
1134 | return &cpuid->entries[i]; | |
ecebb966 VK |
1135 | } |
1136 | ||
1137 | TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index); | |
1138 | ||
1139 | return NULL; | |
1140 | } | |
1141 | ||
4009e0bb SC |
1142 | #define X86_HYPERCALL(inputs...) \ |
1143 | ({ \ | |
1144 | uint64_t r; \ | |
1145 | \ | |
1146 | asm volatile("test %[use_vmmcall], %[use_vmmcall]\n\t" \ | |
1147 | "jnz 1f\n\t" \ | |
1148 | "vmcall\n\t" \ | |
1149 | "jmp 2f\n\t" \ | |
1150 | "1: vmmcall\n\t" \ | |
1151 | "2:" \ | |
1152 | : "=a"(r) \ | |
1153 | : [use_vmmcall] "r" (host_cpu_is_amd), inputs); \ | |
1154 | \ | |
1155 | r; \ | |
1156 | }) | |
1157 | ||
ac4a4d6d OU |
1158 | uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, |
1159 | uint64_t a3) | |
1160 | { | |
4009e0bb | 1161 | return X86_HYPERCALL("a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3)); |
ac4a4d6d | 1162 | } |
32f00fd9 | 1163 | |
813e38cd | 1164 | const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void) |
32f00fd9 VK |
1165 | { |
1166 | static struct kvm_cpuid2 *cpuid; | |
32f00fd9 VK |
1167 | int kvm_fd; |
1168 | ||
1169 | if (cpuid) | |
1170 | return cpuid; | |
1171 | ||
fc66963d | 1172 | cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); |
2aab4b35 | 1173 | kvm_fd = open_kvm_dev_path_or_exit(); |
32f00fd9 | 1174 | |
f9725f89 | 1175 | kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); |
32f00fd9 VK |
1176 | |
1177 | close(kvm_fd); | |
1178 | return cpuid; | |
1179 | } | |
1180 | ||
768e9a61 | 1181 | void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu) |
8f014550 VK |
1182 | { |
1183 | static struct kvm_cpuid2 *cpuid_full; | |
813e38cd | 1184 | const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv; |
8f014550 VK |
1185 | int i, nent = 0; |
1186 | ||
1187 | if (!cpuid_full) { | |
1188 | cpuid_sys = kvm_get_supported_cpuid(); | |
1189 | cpuid_hv = kvm_get_supported_hv_cpuid(); | |
1190 | ||
fc66963d | 1191 | cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent); |
8f014550 VK |
1192 | if (!cpuid_full) { |
1193 | perror("malloc"); | |
1194 | abort(); | |
1195 | } | |
1196 | ||
1197 | /* Need to skip KVM CPUID leaves 0x400000xx */ | |
1198 | for (i = 0; i < cpuid_sys->nent; i++) { | |
1199 | if (cpuid_sys->entries[i].function >= 0x40000000 && | |
1200 | cpuid_sys->entries[i].function < 0x40000100) | |
1201 | continue; | |
1202 | cpuid_full->entries[nent] = cpuid_sys->entries[i]; | |
1203 | nent++; | |
1204 | } | |
1205 | ||
1206 | memcpy(&cpuid_full->entries[nent], cpuid_hv->entries, | |
1207 | cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2)); | |
1208 | cpuid_full->nent = nent + cpuid_hv->nent; | |
1209 | } | |
1210 | ||
7fbc6038 | 1211 | vcpu_init_cpuid(vcpu, cpuid_full); |
8f014550 VK |
1212 | } |
1213 | ||
813e38cd | 1214 | const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu) |
32f00fd9 | 1215 | { |
d838b313 | 1216 | struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); |
32f00fd9 | 1217 | |
768e9a61 | 1218 | vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid); |
32f00fd9 VK |
1219 | |
1220 | return cpuid; | |
1221 | } | |
c8cc43c1 | 1222 | |
c8cc43c1 PB |
1223 | unsigned long vm_compute_max_gfn(struct kvm_vm *vm) |
1224 | { | |
1225 | const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ | |
1226 | unsigned long ht_gfn, max_gfn, max_pfn; | |
53a7dc0f | 1227 | uint8_t maxphyaddr; |
c8cc43c1 PB |
1228 | |
1229 | max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; | |
1230 | ||
1231 | /* Avoid reserved HyperTransport region on AMD processors. */ | |
e6df2ae3 | 1232 | if (!host_cpu_is_amd) |
c8cc43c1 PB |
1233 | return max_gfn; |
1234 | ||
1235 | /* On parts with <40 physical address bits, the area is fully hidden */ | |
1236 | if (vm->pa_bits < 40) | |
1237 | return max_gfn; | |
1238 | ||
1239 | /* Before family 17h, the HyperTransport area is just below 1T. */ | |
1240 | ht_gfn = (1 << 28) - num_ht_pages; | |
24f3f989 | 1241 | if (this_cpu_family() < 0x17) |
c8cc43c1 PB |
1242 | goto done; |
1243 | ||
1244 | /* | |
1245 | * Otherwise it's at the top of the physical address space, possibly | |
1246 | * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use | |
1247 | * the old conservative value if MAXPHYADDR is not enumerated. | |
1248 | */ | |
53a7dc0f | 1249 | if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) |
c8cc43c1 PB |
1250 | goto done; |
1251 | ||
53a7dc0f SC |
1252 | maxphyaddr = this_cpu_property(X86_PROPERTY_MAX_PHY_ADDR); |
1253 | max_pfn = (1ULL << (maxphyaddr - vm->page_shift)) - 1; | |
1254 | ||
1255 | if (this_cpu_has_p(X86_PROPERTY_PHYS_ADDR_REDUCTION)) | |
1256 | max_pfn >>= this_cpu_property(X86_PROPERTY_PHYS_ADDR_REDUCTION); | |
c8cc43c1 PB |
1257 | |
1258 | ht_gfn = max_pfn - num_ht_pages; | |
1259 | done: | |
1260 | return min(max_gfn, ht_gfn - 1); | |
1261 | } | |
b8592448 SC |
1262 | |
1263 | /* Returns true if kvm_intel was loaded with unrestricted_guest=1. */ | |
1264 | bool vm_is_unrestricted_guest(struct kvm_vm *vm) | |
1265 | { | |
b8592448 SC |
1266 | /* Ensure that a KVM vendor-specific module is loaded. */ |
1267 | if (vm == NULL) | |
1268 | close(open_kvm_dev_path_or_exit()); | |
1269 | ||
4d2bd143 | 1270 | return get_kvm_intel_param_bool("unrestricted_guest"); |
b8592448 | 1271 | } |
e6df2ae3 VA |
1272 | |
1273 | void kvm_selftest_arch_init(void) | |
1274 | { | |
1275 | host_cpu_is_intel = this_cpu_is_intel(); | |
1276 | host_cpu_is_amd = this_cpu_is_amd(); | |
1277 | } |