Commit | Line | Data |
---|---|---|
13e48aa9 SC |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #define _GNU_SOURCE /* for program_invocation_short_name */ | |
3 | #include <fcntl.h> | |
4 | #include <pthread.h> | |
5 | #include <sched.h> | |
8a0639fe | 6 | #include <semaphore.h> |
13e48aa9 SC |
7 | #include <signal.h> |
8 | #include <stdio.h> | |
9 | #include <stdlib.h> | |
10 | #include <string.h> | |
11 | #include <sys/ioctl.h> | |
909e0aba | 12 | #include <sys/mman.h> |
13e48aa9 SC |
13 | |
14 | #include <linux/compiler.h> | |
15 | ||
16 | #include <test_util.h> | |
17 | #include <kvm_util.h> | |
18 | #include <processor.h> | |
19 | ||
909e0aba WSM |
20 | /* |
21 | * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a | |
22 | * 2MB sized and aligned region so that the initial region corresponds to | |
23 | * exactly one large page. | |
24 | */ | |
25 | #define MEM_REGION_SIZE 0x200000 | |
26 | ||
5b4f758f | 27 | #ifdef __x86_64__ |
13e48aa9 | 28 | /* |
909e0aba | 29 | * Somewhat arbitrary location and slot, intended to not overlap anything. |
13e48aa9 SC |
30 | */ |
31 | #define MEM_REGION_GPA 0xc0000000 | |
13e48aa9 SC |
32 | #define MEM_REGION_SLOT 10 |
33 | ||
8a0639fe SC |
34 | static const uint64_t MMIO_VAL = 0xbeefull; |
35 | ||
8fb38f05 SC |
36 | extern const uint64_t final_rip_start; |
37 | extern const uint64_t final_rip_end; | |
38 | ||
8a0639fe SC |
39 | static sem_t vcpu_ready; |
40 | ||
41 | static inline uint64_t guest_spin_on_val(uint64_t spin_val) | |
13e48aa9 SC |
42 | { |
43 | uint64_t val; | |
44 | ||
45 | do { | |
46 | val = READ_ONCE(*((uint64_t *)MEM_REGION_GPA)); | |
8a0639fe | 47 | } while (val == spin_val); |
13e48aa9 | 48 | |
8a0639fe SC |
49 | GUEST_SYNC(0); |
50 | return val; | |
13e48aa9 SC |
51 | } |
52 | ||
53 | static void *vcpu_worker(void *data) | |
54 | { | |
d7828144 SC |
55 | struct kvm_vcpu *vcpu = data; |
56 | struct kvm_run *run = vcpu->run; | |
13e48aa9 SC |
57 | struct ucall uc; |
58 | uint64_t cmd; | |
59 | ||
60 | /* | |
61 | * Loop until the guest is done. Re-enter the guest on all MMIO exits, | |
8a0639fe SC |
62 | * which will occur if the guest attempts to access a memslot after it |
63 | * has been deleted or while it is being moved . | |
13e48aa9 | 64 | */ |
8a0639fe | 65 | while (1) { |
768e9a61 | 66 | vcpu_run(vcpu); |
13e48aa9 | 67 | |
8a0639fe | 68 | if (run->exit_reason == KVM_EXIT_IO) { |
768e9a61 | 69 | cmd = get_ucall(vcpu, &uc); |
8a0639fe SC |
70 | if (cmd != UCALL_SYNC) |
71 | break; | |
72 | ||
73 | sem_post(&vcpu_ready); | |
74 | continue; | |
75 | } | |
76 | ||
77 | if (run->exit_reason != KVM_EXIT_MMIO) | |
78 | break; | |
79 | ||
80 | TEST_ASSERT(!run->mmio.is_write, "Unexpected exit mmio write"); | |
81 | TEST_ASSERT(run->mmio.len == 8, | |
82 | "Unexpected exit mmio size = %u", run->mmio.len); | |
83 | ||
84 | TEST_ASSERT(run->mmio.phys_addr == MEM_REGION_GPA, | |
85 | "Unexpected exit mmio address = 0x%llx", | |
86 | run->mmio.phys_addr); | |
87 | memcpy(run->mmio.data, &MMIO_VAL, 8); | |
88 | } | |
89 | ||
90 | if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT) | |
594a1c27 | 91 | REPORT_GUEST_ASSERT_1(uc, "val = %lu"); |
13e48aa9 | 92 | |
13e48aa9 SC |
93 | return NULL; |
94 | } | |
95 | ||
8a0639fe SC |
96 | static void wait_for_vcpu(void) |
97 | { | |
98 | struct timespec ts; | |
99 | ||
100 | TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts), | |
101 | "clock_gettime() failed: %d\n", errno); | |
102 | ||
103 | ts.tv_sec += 2; | |
104 | TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts), | |
105 | "sem_timedwait() failed: %d\n", errno); | |
106 | ||
107 | /* Wait for the vCPU thread to reenter the guest. */ | |
108 | usleep(100000); | |
109 | } | |
110 | ||
d7828144 SC |
111 | static struct kvm_vm *spawn_vm(struct kvm_vcpu **vcpu, pthread_t *vcpu_thread, |
112 | void *guest_code) | |
13e48aa9 | 113 | { |
13e48aa9 SC |
114 | struct kvm_vm *vm; |
115 | uint64_t *hva; | |
116 | uint64_t gpa; | |
117 | ||
d7828144 | 118 | vm = vm_create_with_one_vcpu(vcpu, guest_code); |
13e48aa9 | 119 | |
13e48aa9 SC |
120 | vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP, |
121 | MEM_REGION_GPA, MEM_REGION_SLOT, | |
122 | MEM_REGION_SIZE / getpagesize(), 0); | |
123 | ||
124 | /* | |
125 | * Allocate and map two pages so that the GPA accessed by guest_code() | |
126 | * stays valid across the memslot move. | |
127 | */ | |
128 | gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT); | |
129 | TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n"); | |
130 | ||
4307af73 | 131 | virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2); |
13e48aa9 SC |
132 | |
133 | /* Ditto for the host mapping so that both pages can be zeroed. */ | |
134 | hva = addr_gpa2hva(vm, MEM_REGION_GPA); | |
135 | memset(hva, 0, 2 * 4096); | |
136 | ||
d7828144 | 137 | pthread_create(vcpu_thread, NULL, vcpu_worker, *vcpu); |
13e48aa9 SC |
138 | |
139 | /* Ensure the guest thread is spun up. */ | |
8a0639fe SC |
140 | wait_for_vcpu(); |
141 | ||
142 | return vm; | |
143 | } | |
144 | ||
145 | ||
146 | static void guest_code_move_memory_region(void) | |
147 | { | |
148 | uint64_t val; | |
149 | ||
150 | GUEST_SYNC(0); | |
151 | ||
152 | /* | |
0c55f867 MS |
153 | * Spin until the memory region starts getting moved to a |
154 | * misaligned address. | |
155 | * Every region move may or may not trigger MMIO, as the | |
156 | * window where the memslot is invalid is usually quite small. | |
8a0639fe SC |
157 | */ |
158 | val = guest_spin_on_val(0); | |
159 | GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val); | |
160 | ||
0c55f867 MS |
161 | /* Spin until the misaligning memory region move completes. */ |
162 | val = guest_spin_on_val(MMIO_VAL); | |
163 | GUEST_ASSERT_1(val == 1 || val == 0, val); | |
164 | ||
165 | /* Spin until the memory region starts to get re-aligned. */ | |
166 | val = guest_spin_on_val(0); | |
167 | GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val); | |
168 | ||
169 | /* Spin until the re-aligning memory region move completes. */ | |
8a0639fe SC |
170 | val = guest_spin_on_val(MMIO_VAL); |
171 | GUEST_ASSERT_1(val == 1, val); | |
172 | ||
173 | GUEST_DONE(); | |
174 | } | |
175 | ||
176 | static void test_move_memory_region(void) | |
177 | { | |
178 | pthread_t vcpu_thread; | |
d7828144 | 179 | struct kvm_vcpu *vcpu; |
8a0639fe SC |
180 | struct kvm_vm *vm; |
181 | uint64_t *hva; | |
182 | ||
d7828144 | 183 | vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_move_memory_region); |
8a0639fe SC |
184 | |
185 | hva = addr_gpa2hva(vm, MEM_REGION_GPA); | |
13e48aa9 SC |
186 | |
187 | /* | |
188 | * Shift the region's base GPA. The guest should not see "2" as the | |
189 | * hva->gpa translation is misaligned, i.e. the guest is accessing a | |
190 | * different host pfn. | |
191 | */ | |
192 | vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA - 4096); | |
193 | WRITE_ONCE(*hva, 2); | |
194 | ||
8a0639fe SC |
195 | /* |
196 | * The guest _might_ see an invalid memslot and trigger MMIO, but it's | |
197 | * a tiny window. Spin and defer the sync until the memslot is | |
198 | * restored and guest behavior is once again deterministic. | |
199 | */ | |
13e48aa9 SC |
200 | usleep(100000); |
201 | ||
202 | /* | |
203 | * Note, value in memory needs to be changed *before* restoring the | |
204 | * memslot, else the guest could race the update and see "2". | |
205 | */ | |
206 | WRITE_ONCE(*hva, 1); | |
207 | ||
208 | /* Restore the original base, the guest should see "1". */ | |
209 | vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA); | |
8a0639fe SC |
210 | wait_for_vcpu(); |
211 | /* Defered sync from when the memslot was misaligned (above). */ | |
212 | wait_for_vcpu(); | |
13e48aa9 SC |
213 | |
214 | pthread_join(vcpu_thread, NULL); | |
215 | ||
216 | kvm_vm_free(vm); | |
217 | } | |
218 | ||
8fb38f05 SC |
219 | static void guest_code_delete_memory_region(void) |
220 | { | |
221 | uint64_t val; | |
222 | ||
223 | GUEST_SYNC(0); | |
224 | ||
225 | /* Spin until the memory region is deleted. */ | |
226 | val = guest_spin_on_val(0); | |
227 | GUEST_ASSERT_1(val == MMIO_VAL, val); | |
228 | ||
229 | /* Spin until the memory region is recreated. */ | |
230 | val = guest_spin_on_val(MMIO_VAL); | |
231 | GUEST_ASSERT_1(val == 0, val); | |
232 | ||
233 | /* Spin until the memory region is deleted. */ | |
234 | val = guest_spin_on_val(0); | |
235 | GUEST_ASSERT_1(val == MMIO_VAL, val); | |
236 | ||
237 | asm("1:\n\t" | |
238 | ".pushsection .rodata\n\t" | |
239 | ".global final_rip_start\n\t" | |
240 | "final_rip_start: .quad 1b\n\t" | |
241 | ".popsection"); | |
242 | ||
243 | /* Spin indefinitely (until the code memslot is deleted). */ | |
244 | guest_spin_on_val(MMIO_VAL); | |
245 | ||
246 | asm("1:\n\t" | |
247 | ".pushsection .rodata\n\t" | |
248 | ".global final_rip_end\n\t" | |
249 | "final_rip_end: .quad 1b\n\t" | |
250 | ".popsection"); | |
251 | ||
252 | GUEST_ASSERT_1(0, 0); | |
253 | } | |
254 | ||
255 | static void test_delete_memory_region(void) | |
256 | { | |
257 | pthread_t vcpu_thread; | |
d7828144 | 258 | struct kvm_vcpu *vcpu; |
8fb38f05 SC |
259 | struct kvm_regs regs; |
260 | struct kvm_run *run; | |
261 | struct kvm_vm *vm; | |
262 | ||
d7828144 | 263 | vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_delete_memory_region); |
8fb38f05 SC |
264 | |
265 | /* Delete the memory region, the guest should not die. */ | |
266 | vm_mem_region_delete(vm, MEM_REGION_SLOT); | |
267 | wait_for_vcpu(); | |
268 | ||
269 | /* Recreate the memory region. The guest should see "0". */ | |
270 | vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP, | |
271 | MEM_REGION_GPA, MEM_REGION_SLOT, | |
272 | MEM_REGION_SIZE / getpagesize(), 0); | |
273 | wait_for_vcpu(); | |
274 | ||
275 | /* Delete the region again so that there's only one memslot left. */ | |
276 | vm_mem_region_delete(vm, MEM_REGION_SLOT); | |
277 | wait_for_vcpu(); | |
278 | ||
279 | /* | |
280 | * Delete the primary memslot. This should cause an emulation error or | |
281 | * shutdown due to the page tables getting nuked. | |
282 | */ | |
283 | vm_mem_region_delete(vm, 0); | |
284 | ||
285 | pthread_join(vcpu_thread, NULL); | |
286 | ||
d7828144 | 287 | run = vcpu->run; |
8fb38f05 SC |
288 | |
289 | TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN || | |
290 | run->exit_reason == KVM_EXIT_INTERNAL_ERROR, | |
291 | "Unexpected exit reason = %d", run->exit_reason); | |
292 | ||
768e9a61 | 293 | vcpu_regs_get(vcpu, ®s); |
8fb38f05 | 294 | |
1d2c6c9b PB |
295 | /* |
296 | * On AMD, after KVM_EXIT_SHUTDOWN the VMCB has been reinitialized already, | |
297 | * so the instruction pointer would point to the reset vector. | |
298 | */ | |
299 | if (run->exit_reason == KVM_EXIT_INTERNAL_ERROR) | |
300 | TEST_ASSERT(regs.rip >= final_rip_start && | |
301 | regs.rip < final_rip_end, | |
302 | "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx\n", | |
303 | final_rip_start, final_rip_end, regs.rip); | |
8fb38f05 SC |
304 | |
305 | kvm_vm_free(vm); | |
306 | } | |
307 | ||
8cc2dd63 SC |
308 | static void test_zero_memory_regions(void) |
309 | { | |
d7828144 | 310 | struct kvm_vcpu *vcpu; |
8cc2dd63 SC |
311 | struct kvm_vm *vm; |
312 | ||
313 | pr_info("Testing KVM_RUN with zero added memory regions\n"); | |
314 | ||
95fb0460 | 315 | vm = vm_create_barebones(); |
f742d94f | 316 | vcpu = __vm_vcpu_add(vm, 0); |
8cc2dd63 | 317 | |
10825b55 | 318 | vm_ioctl(vm, KVM_SET_NR_MMU_PAGES, (void *)64ul); |
768e9a61 | 319 | vcpu_run(vcpu); |
c96f57b0 | 320 | TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); |
8cc2dd63 SC |
321 | |
322 | kvm_vm_free(vm); | |
323 | } | |
5b4f758f | 324 | #endif /* __x86_64__ */ |
8cc2dd63 | 325 | |
909e0aba WSM |
326 | /* |
327 | * Test it can be added memory slots up to KVM_CAP_NR_MEMSLOTS, then any | |
328 | * tentative to add further slots should fail. | |
329 | */ | |
330 | static void test_add_max_memory_regions(void) | |
331 | { | |
332 | int ret; | |
333 | struct kvm_vm *vm; | |
334 | uint32_t max_mem_slots; | |
335 | uint32_t slot; | |
3bf0fcd7 VK |
336 | void *mem, *mem_aligned, *mem_extra; |
337 | size_t alignment; | |
338 | ||
339 | #ifdef __s390x__ | |
340 | /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ | |
341 | alignment = 0x100000; | |
342 | #else | |
343 | alignment = 1; | |
344 | #endif | |
909e0aba WSM |
345 | |
346 | max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); | |
347 | TEST_ASSERT(max_mem_slots > 0, | |
348 | "KVM_CAP_NR_MEMSLOTS should be greater than 0"); | |
349 | pr_info("Allowed number of memory slots: %i\n", max_mem_slots); | |
350 | ||
95fb0460 | 351 | vm = vm_create_barebones(); |
909e0aba | 352 | |
909e0aba WSM |
353 | /* Check it can be added memory slots up to the maximum allowed */ |
354 | pr_info("Adding slots 0..%i, each memory region with %dK size\n", | |
355 | (max_mem_slots - 1), MEM_REGION_SIZE >> 10); | |
3bf0fcd7 | 356 | |
309505dd | 357 | mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment, |
cd4220d2 CB |
358 | PROT_READ | PROT_WRITE, |
359 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); | |
3bf0fcd7 VK |
360 | TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); |
361 | mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1)); | |
362 | ||
3d7d6043 SC |
363 | for (slot = 0; slot < max_mem_slots; slot++) |
364 | vm_set_user_memory_region(vm, slot, 0, | |
365 | ((uint64_t)slot * MEM_REGION_SIZE), | |
366 | MEM_REGION_SIZE, | |
367 | mem_aligned + (uint64_t)slot * MEM_REGION_SIZE); | |
909e0aba WSM |
368 | |
369 | /* Check it cannot be added memory slots beyond the limit */ | |
3bf0fcd7 VK |
370 | mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE, |
371 | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); | |
372 | TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host"); | |
909e0aba | 373 | |
3d7d6043 SC |
374 | ret = __vm_set_user_memory_region(vm, max_mem_slots, 0, |
375 | (uint64_t)max_mem_slots * MEM_REGION_SIZE, | |
376 | MEM_REGION_SIZE, mem_extra); | |
909e0aba WSM |
377 | TEST_ASSERT(ret == -1 && errno == EINVAL, |
378 | "Adding one more memory slot should fail with EINVAL"); | |
379 | ||
309505dd | 380 | munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment); |
3bf0fcd7 | 381 | munmap(mem_extra, MEM_REGION_SIZE); |
909e0aba WSM |
382 | kvm_vm_free(vm); |
383 | } | |
384 | ||
13e48aa9 SC |
385 | int main(int argc, char *argv[]) |
386 | { | |
5b4f758f | 387 | #ifdef __x86_64__ |
13e48aa9 | 388 | int i, loops; |
5b4f758f | 389 | #endif |
13e48aa9 | 390 | |
5b4f758f SC |
391 | #ifdef __x86_64__ |
392 | /* | |
393 | * FIXME: the zero-memslot test fails on aarch64 and s390x because | |
394 | * KVM_RUN fails with ENOEXEC or EFAULT. | |
395 | */ | |
8cc2dd63 | 396 | test_zero_memory_regions(); |
909e0aba WSM |
397 | #endif |
398 | ||
399 | test_add_max_memory_regions(); | |
8cc2dd63 | 400 | |
909e0aba | 401 | #ifdef __x86_64__ |
13e48aa9 | 402 | if (argc > 1) |
0001725d | 403 | loops = atoi_positive("Number of iterations", argv[1]); |
13e48aa9 SC |
404 | else |
405 | loops = 10; | |
406 | ||
8fb38f05 | 407 | pr_info("Testing MOVE of in-use region, %d loops\n", loops); |
13e48aa9 SC |
408 | for (i = 0; i < loops; i++) |
409 | test_move_memory_region(); | |
410 | ||
8fb38f05 SC |
411 | pr_info("Testing DELETE of in-use region, %d loops\n", loops); |
412 | for (i = 0; i < loops; i++) | |
413 | test_delete_memory_region(); | |
5b4f758f | 414 | #endif |
8fb38f05 | 415 | |
13e48aa9 SC |
416 | return 0; |
417 | } |