Commit | Line | Data |
---|---|---|
982ed0de DW |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* | |
3 | * Kernel-based Virtual Machine driver for Linux | |
4 | * | |
5 | * This module enables kernel and guest-mode vCPU access to guest physical | |
6 | * memory with suitable invalidation mechanisms. | |
7 | * | |
8 | * Copyright © 2021 Amazon.com, Inc. or its affiliates. | |
9 | * | |
10 | * Authors: | |
11 | * David Woodhouse <dwmw2@infradead.org> | |
12 | */ | |
13 | ||
14 | #include <linux/kvm_host.h> | |
15 | #include <linux/kvm.h> | |
16 | #include <linux/highmem.h> | |
17 | #include <linux/module.h> | |
18 | #include <linux/errno.h> | |
19 | ||
20 | #include "kvm_mm.h" | |
21 | ||
22 | /* | |
23 | * MMU notifier 'invalidate_range_start' hook. | |
24 | */ | |
25 | void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start, | |
26 | unsigned long end, bool may_block) | |
27 | { | |
982ed0de | 28 | struct gfn_to_pfn_cache *gpc; |
982ed0de DW |
29 | |
30 | spin_lock(&kvm->gpc_lock); | |
31 | list_for_each_entry(gpc, &kvm->gpc_list, list) { | |
9fa336e3 | 32 | read_lock_irq(&gpc->lock); |
982ed0de DW |
33 | |
34 | /* Only a single page so no need to care about length */ | |
35 | if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) && | |
36 | gpc->uhva >= start && gpc->uhva < end) { | |
9fa336e3 PD |
37 | read_unlock_irq(&gpc->lock); |
38 | ||
39 | /* | |
40 | * There is a small window here where the cache could | |
41 | * be modified, and invalidation would no longer be | |
42 | * necessary. Hence check again whether invalidation | |
43 | * is still necessary once the write lock has been | |
44 | * acquired. | |
45 | */ | |
46 | ||
47 | write_lock_irq(&gpc->lock); | |
48 | if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) && | |
49 | gpc->uhva >= start && gpc->uhva < end) | |
50 | gpc->valid = false; | |
51 | write_unlock_irq(&gpc->lock); | |
52 | continue; | |
982ed0de | 53 | } |
9fa336e3 PD |
54 | |
55 | read_unlock_irq(&gpc->lock); | |
982ed0de DW |
56 | } |
57 | spin_unlock(&kvm->gpc_lock); | |
982ed0de DW |
58 | } |
59 | ||
18f06e97 SC |
60 | static bool kvm_gpc_is_valid_len(gpa_t gpa, unsigned long uhva, |
61 | unsigned long len) | |
62 | { | |
63 | unsigned long offset = kvm_is_error_gpa(gpa) ? offset_in_page(uhva) : | |
64 | offset_in_page(gpa); | |
65 | ||
66 | /* | |
67 | * The cached access must fit within a single page. The 'len' argument | |
68 | * to activate() and refresh() exists only to enforce that. | |
69 | */ | |
70 | return offset + len <= PAGE_SIZE; | |
71 | } | |
72 | ||
58f5ee5f | 73 | bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len) |
982ed0de | 74 | { |
e308c24a | 75 | struct kvm_memslots *slots = kvm_memslots(gpc->kvm); |
982ed0de | 76 | |
ecbcf030 SC |
77 | if (!gpc->active) |
78 | return false; | |
79 | ||
721f5b0d PD |
80 | /* |
81 | * If the page was cached from a memslot, make sure the memslots have | |
82 | * not been re-configured. | |
83 | */ | |
84 | if (!kvm_is_error_gpa(gpc->gpa) && gpc->generation != slots->generation) | |
85 | return false; | |
86 | ||
87 | if (kvm_is_error_hva(gpc->uhva)) | |
982ed0de DW |
88 | return false; |
89 | ||
18f06e97 | 90 | if (!kvm_gpc_is_valid_len(gpc->gpa, gpc->uhva, len)) |
982ed0de DW |
91 | return false; |
92 | ||
93 | if (!gpc->valid) | |
94 | return false; | |
95 | ||
96 | return true; | |
97 | } | |
982ed0de | 98 | |
f39b80e3 | 99 | static void *gpc_map(kvm_pfn_t pfn) |
982ed0de | 100 | { |
f39b80e3 PD |
101 | if (pfn_valid(pfn)) |
102 | return kmap(pfn_to_page(pfn)); | |
103 | ||
982ed0de | 104 | #ifdef CONFIG_HAS_IOMEM |
f39b80e3 PD |
105 | return memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB); |
106 | #else | |
107 | return NULL; | |
982ed0de | 108 | #endif |
f39b80e3 PD |
109 | } |
110 | ||
111 | static void gpc_unmap(kvm_pfn_t pfn, void *khva) | |
112 | { | |
113 | /* Unmap the old pfn/page if it was mapped before. */ | |
114 | if (is_error_noslot_pfn(pfn) || !khva) | |
115 | return; | |
116 | ||
117 | if (pfn_valid(pfn)) { | |
118 | kunmap(pfn_to_page(pfn)); | |
119 | return; | |
982ed0de | 120 | } |
f39b80e3 PD |
121 | |
122 | #ifdef CONFIG_HAS_IOMEM | |
123 | memunmap(khva); | |
124 | #endif | |
982ed0de DW |
125 | } |
126 | ||
58cd407c | 127 | static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_seq) |
982ed0de | 128 | { |
58cd407c SC |
129 | /* |
130 | * mn_active_invalidate_count acts for all intents and purposes | |
20ec3ebd CP |
131 | * like mmu_invalidate_in_progress here; but the latter cannot |
132 | * be used here because the invalidation of caches in the | |
133 | * mmu_notifier event occurs _before_ mmu_invalidate_in_progress | |
134 | * is elevated. | |
58cd407c SC |
135 | * |
136 | * Note, it does not matter that mn_active_invalidate_count | |
137 | * is not protected by gpc->lock. It is guaranteed to | |
138 | * be elevated before the mmu_notifier acquires gpc->lock, and | |
20ec3ebd | 139 | * isn't dropped until after mmu_invalidate_seq is updated. |
58cd407c SC |
140 | */ |
141 | if (kvm->mn_active_invalidate_count) | |
142 | return true; | |
143 | ||
144 | /* | |
145 | * Ensure mn_active_invalidate_count is read before | |
20ec3ebd | 146 | * mmu_invalidate_seq. This pairs with the smp_wmb() in |
58cd407c SC |
147 | * mmu_notifier_invalidate_range_end() to guarantee either the |
148 | * old (non-zero) value of mn_active_invalidate_count or the | |
20ec3ebd | 149 | * new (incremented) value of mmu_invalidate_seq is observed. |
58cd407c SC |
150 | */ |
151 | smp_rmb(); | |
20ec3ebd | 152 | return kvm->mmu_invalidate_seq != mmu_seq; |
58cd407c SC |
153 | } |
154 | ||
2a0b128a | 155 | static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) |
58cd407c SC |
156 | { |
157 | /* Note, the new page offset may be different than the old! */ | |
406c1096 | 158 | void *old_khva = (void *)PAGE_ALIGN_DOWN((uintptr_t)gpc->khva); |
58cd407c SC |
159 | kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT; |
160 | void *new_khva = NULL; | |
982ed0de | 161 | unsigned long mmu_seq; |
58cd407c SC |
162 | |
163 | lockdep_assert_held(&gpc->refresh_lock); | |
164 | ||
165 | lockdep_assert_held_write(&gpc->lock); | |
166 | ||
167 | /* | |
168 | * Invalidate the cache prior to dropping gpc->lock, the gpa=>uhva | |
169 | * assets have already been updated and so a concurrent check() from a | |
170 | * different task may not fail the gpa/uhva/generation checks. | |
171 | */ | |
172 | gpc->valid = false; | |
982ed0de DW |
173 | |
174 | do { | |
2a0b128a | 175 | mmu_seq = gpc->kvm->mmu_invalidate_seq; |
982ed0de DW |
176 | smp_rmb(); |
177 | ||
58cd407c SC |
178 | write_unlock_irq(&gpc->lock); |
179 | ||
180 | /* | |
181 | * If the previous iteration "failed" due to an mmu_notifier | |
182 | * event, release the pfn and unmap the kernel virtual address | |
183 | * from the previous attempt. Unmapping might sleep, so this | |
184 | * needs to be done after dropping the lock. Opportunistically | |
185 | * check for resched while the lock isn't held. | |
186 | */ | |
187 | if (new_pfn != KVM_PFN_ERR_FAULT) { | |
188 | /* | |
189 | * Keep the mapping if the previous iteration reused | |
190 | * the existing mapping and didn't create a new one. | |
191 | */ | |
85165781 | 192 | if (new_khva != old_khva) |
f39b80e3 | 193 | gpc_unmap(new_pfn, new_khva); |
58cd407c | 194 | |
85165781 | 195 | kvm_release_pfn_clean(new_pfn); |
58cd407c SC |
196 | |
197 | cond_resched(); | |
198 | } | |
199 | ||
982ed0de | 200 | /* We always request a writeable mapping */ |
c8b88b33 | 201 | new_pfn = hva_to_pfn(gpc->uhva, false, false, NULL, true, NULL); |
982ed0de | 202 | if (is_error_noslot_pfn(new_pfn)) |
58cd407c | 203 | goto out_error; |
982ed0de | 204 | |
58cd407c SC |
205 | /* |
206 | * Obtain a new kernel mapping if KVM itself will access the | |
207 | * pfn. Note, kmap() and memremap() can both sleep, so this | |
208 | * too must be done outside of gpc->lock! | |
209 | */ | |
a4bff3df PD |
210 | if (new_pfn == gpc->pfn) |
211 | new_khva = old_khva; | |
212 | else | |
213 | new_khva = gpc_map(new_pfn); | |
214 | ||
215 | if (!new_khva) { | |
216 | kvm_release_pfn_clean(new_pfn); | |
217 | goto out_error; | |
58cd407c | 218 | } |
982ed0de | 219 | |
58cd407c | 220 | write_lock_irq(&gpc->lock); |
982ed0de | 221 | |
58cd407c SC |
222 | /* |
223 | * Other tasks must wait for _this_ refresh to complete before | |
224 | * attempting to refresh. | |
225 | */ | |
226 | WARN_ON_ONCE(gpc->valid); | |
2a0b128a | 227 | } while (mmu_notifier_retry_cache(gpc->kvm, mmu_seq)); |
58cd407c SC |
228 | |
229 | gpc->valid = true; | |
230 | gpc->pfn = new_pfn; | |
406c1096 | 231 | gpc->khva = new_khva + offset_in_page(gpc->uhva); |
85165781 SC |
232 | |
233 | /* | |
234 | * Put the reference to the _new_ pfn. The pfn is now tracked by the | |
235 | * cache and can be safely migrated, swapped, etc... as the cache will | |
236 | * invalidate any mappings in response to relevant mmu_notifier events. | |
237 | */ | |
238 | kvm_release_pfn_clean(new_pfn); | |
239 | ||
58cd407c SC |
240 | return 0; |
241 | ||
242 | out_error: | |
243 | write_lock_irq(&gpc->lock); | |
244 | ||
245 | return -EFAULT; | |
982ed0de DW |
246 | } |
247 | ||
721f5b0d | 248 | static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva, |
58f5ee5f | 249 | unsigned long len) |
982ed0de | 250 | { |
721f5b0d | 251 | unsigned long page_offset; |
ecbcf030 | 252 | bool unmap_old = false; |
982ed0de | 253 | unsigned long old_uhva; |
ecbcf030 | 254 | kvm_pfn_t old_pfn; |
406c1096 | 255 | bool hva_change = false; |
982ed0de | 256 | void *old_khva; |
ecbcf030 | 257 | int ret; |
982ed0de | 258 | |
721f5b0d PD |
259 | /* Either gpa or uhva must be valid, but not both */ |
260 | if (WARN_ON_ONCE(kvm_is_error_gpa(gpa) == kvm_is_error_hva(uhva))) | |
261 | return -EINVAL; | |
262 | ||
18f06e97 | 263 | if (!kvm_gpc_is_valid_len(gpa, uhva, len)) |
982ed0de DW |
264 | return -EINVAL; |
265 | ||
6addfcf2 | 266 | lockdep_assert_held(&gpc->refresh_lock); |
93984f19 | 267 | |
982ed0de DW |
268 | write_lock_irq(&gpc->lock); |
269 | ||
ecbcf030 SC |
270 | if (!gpc->active) { |
271 | ret = -EINVAL; | |
272 | goto out_unlock; | |
273 | } | |
274 | ||
982ed0de | 275 | old_pfn = gpc->pfn; |
406c1096 PD |
276 | old_khva = (void *)PAGE_ALIGN_DOWN((uintptr_t)gpc->khva); |
277 | old_uhva = PAGE_ALIGN_DOWN(gpc->uhva); | |
982ed0de | 278 | |
721f5b0d | 279 | if (kvm_is_error_gpa(gpa)) { |
18f06e97 SC |
280 | page_offset = offset_in_page(uhva); |
281 | ||
721f5b0d PD |
282 | gpc->gpa = INVALID_GPA; |
283 | gpc->memslot = NULL; | |
284 | gpc->uhva = PAGE_ALIGN_DOWN(uhva); | |
982ed0de | 285 | |
406c1096 PD |
286 | if (gpc->uhva != old_uhva) |
287 | hva_change = true; | |
288 | } else { | |
721f5b0d PD |
289 | struct kvm_memslots *slots = kvm_memslots(gpc->kvm); |
290 | ||
18f06e97 SC |
291 | page_offset = offset_in_page(gpa); |
292 | ||
721f5b0d PD |
293 | if (gpc->gpa != gpa || gpc->generation != slots->generation || |
294 | kvm_is_error_hva(gpc->uhva)) { | |
295 | gfn_t gfn = gpa_to_gfn(gpa); | |
296 | ||
297 | gpc->gpa = gpa; | |
298 | gpc->generation = slots->generation; | |
299 | gpc->memslot = __gfn_to_memslot(slots, gfn); | |
300 | gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn); | |
301 | ||
302 | if (kvm_is_error_hva(gpc->uhva)) { | |
303 | ret = -EFAULT; | |
304 | goto out; | |
305 | } | |
306 | ||
307 | /* | |
308 | * Even if the GPA and/or the memslot generation changed, the | |
309 | * HVA may still be the same. | |
310 | */ | |
311 | if (gpc->uhva != old_uhva) | |
312 | hva_change = true; | |
313 | } else { | |
314 | gpc->uhva = old_uhva; | |
315 | } | |
982ed0de DW |
316 | } |
317 | ||
406c1096 PD |
318 | /* Note: the offset must be correct before calling hva_to_pfn_retry() */ |
319 | gpc->uhva += page_offset; | |
320 | ||
982ed0de DW |
321 | /* |
322 | * If the userspace HVA changed or the PFN was already invalid, | |
323 | * drop the lock and do the HVA to PFN lookup again. | |
324 | */ | |
406c1096 | 325 | if (!gpc->valid || hva_change) { |
2a0b128a | 326 | ret = hva_to_pfn_retry(gpc); |
982ed0de | 327 | } else { |
8332f0ed DW |
328 | /* |
329 | * If the HVA→PFN mapping was already valid, don't unmap it. | |
330 | * But do update gpc->khva because the offset within the page | |
331 | * may have changed. | |
332 | */ | |
333 | gpc->khva = old_khva + page_offset; | |
ecbcf030 | 334 | ret = 0; |
06e155c4 | 335 | goto out_unlock; |
982ed0de DW |
336 | } |
337 | ||
338 | out: | |
58cd407c SC |
339 | /* |
340 | * Invalidate the cache and purge the pfn/khva if the refresh failed. | |
341 | * Some/all of the uhva, gpa, and memslot generation info may still be | |
342 | * valid, leave it as is. | |
343 | */ | |
344 | if (ret) { | |
345 | gpc->valid = false; | |
346 | gpc->pfn = KVM_PFN_ERR_FAULT; | |
347 | gpc->khva = NULL; | |
348 | } | |
349 | ||
ecbcf030 SC |
350 | /* Detect a pfn change before dropping the lock! */ |
351 | unmap_old = (old_pfn != gpc->pfn); | |
58cd407c | 352 | |
ecbcf030 | 353 | out_unlock: |
982ed0de DW |
354 | write_unlock_irq(&gpc->lock); |
355 | ||
ecbcf030 | 356 | if (unmap_old) |
f39b80e3 | 357 | gpc_unmap(old_pfn, old_khva); |
982ed0de DW |
358 | |
359 | return ret; | |
360 | } | |
58f5ee5f SC |
361 | |
362 | int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len) | |
363 | { | |
6addfcf2 DW |
364 | unsigned long uhva; |
365 | ||
366 | guard(mutex)(&gpc->refresh_lock); | |
367 | ||
721f5b0d PD |
368 | /* |
369 | * If the GPA is valid then ignore the HVA, as a cache can be GPA-based | |
370 | * or HVA-based, not both. For GPA-based caches, the HVA will be | |
371 | * recomputed during refresh if necessary. | |
372 | */ | |
6addfcf2 | 373 | uhva = kvm_is_error_gpa(gpc->gpa) ? gpc->uhva : KVM_HVA_ERR_BAD; |
721f5b0d PD |
374 | |
375 | return __kvm_gpc_refresh(gpc, gpc->gpa, uhva, len); | |
58f5ee5f | 376 | } |
982ed0de | 377 | |
a4bff3df | 378 | void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm) |
52491a38 ML |
379 | { |
380 | rwlock_init(&gpc->lock); | |
381 | mutex_init(&gpc->refresh_lock); | |
8c82a0b3 ML |
382 | |
383 | gpc->kvm = kvm; | |
5762cb10 | 384 | gpc->pfn = KVM_PFN_ERR_FAULT; |
721f5b0d | 385 | gpc->gpa = INVALID_GPA; |
5762cb10 | 386 | gpc->uhva = KVM_HVA_ERR_BAD; |
6addfcf2 | 387 | gpc->active = gpc->valid = false; |
52491a38 | 388 | } |
982ed0de | 389 | |
721f5b0d PD |
390 | static int __kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva, |
391 | unsigned long len) | |
982ed0de | 392 | { |
8c82a0b3 | 393 | struct kvm *kvm = gpc->kvm; |
d0d96121 | 394 | |
6addfcf2 DW |
395 | guard(mutex)(&gpc->refresh_lock); |
396 | ||
982ed0de | 397 | if (!gpc->active) { |
5762cb10 SC |
398 | if (KVM_BUG_ON(gpc->valid, kvm)) |
399 | return -EIO; | |
982ed0de DW |
400 | |
401 | spin_lock(&kvm->gpc_lock); | |
402 | list_add(&gpc->list, &kvm->gpc_list); | |
403 | spin_unlock(&kvm->gpc_lock); | |
ecbcf030 SC |
404 | |
405 | /* | |
406 | * Activate the cache after adding it to the list, a concurrent | |
407 | * refresh must not establish a mapping until the cache is | |
408 | * reachable by mmu_notifier events. | |
409 | */ | |
410 | write_lock_irq(&gpc->lock); | |
411 | gpc->active = true; | |
412 | write_unlock_irq(&gpc->lock); | |
982ed0de | 413 | } |
721f5b0d PD |
414 | return __kvm_gpc_refresh(gpc, gpa, uhva, len); |
415 | } | |
416 | ||
417 | int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) | |
418 | { | |
419 | return __kvm_gpc_activate(gpc, gpa, KVM_HVA_ERR_BAD, len); | |
420 | } | |
421 | ||
422 | int kvm_gpc_activate_hva(struct gfn_to_pfn_cache *gpc, unsigned long uhva, unsigned long len) | |
423 | { | |
424 | return __kvm_gpc_activate(gpc, INVALID_GPA, uhva, len); | |
982ed0de | 425 | } |
982ed0de | 426 | |
8c82a0b3 | 427 | void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc) |
982ed0de | 428 | { |
8c82a0b3 | 429 | struct kvm *kvm = gpc->kvm; |
9f87791d SC |
430 | kvm_pfn_t old_pfn; |
431 | void *old_khva; | |
8c82a0b3 | 432 | |
6addfcf2 DW |
433 | guard(mutex)(&gpc->refresh_lock); |
434 | ||
982ed0de | 435 | if (gpc->active) { |
ecbcf030 SC |
436 | /* |
437 | * Deactivate the cache before removing it from the list, KVM | |
438 | * must stall mmu_notifier events until all users go away, i.e. | |
439 | * until gpc->lock is dropped and refresh is guaranteed to fail. | |
440 | */ | |
441 | write_lock_irq(&gpc->lock); | |
442 | gpc->active = false; | |
9f87791d SC |
443 | gpc->valid = false; |
444 | ||
445 | /* | |
446 | * Leave the GPA => uHVA cache intact, it's protected by the | |
447 | * memslot generation. The PFN lookup needs to be redone every | |
448 | * time as mmu_notifier protection is lost when the cache is | |
449 | * removed from the VM's gpc_list. | |
450 | */ | |
451 | old_khva = gpc->khva - offset_in_page(gpc->khva); | |
452 | gpc->khva = NULL; | |
453 | ||
454 | old_pfn = gpc->pfn; | |
455 | gpc->pfn = KVM_PFN_ERR_FAULT; | |
ecbcf030 SC |
456 | write_unlock_irq(&gpc->lock); |
457 | ||
982ed0de DW |
458 | spin_lock(&kvm->gpc_lock); |
459 | list_del(&gpc->list); | |
460 | spin_unlock(&kvm->gpc_lock); | |
461 | ||
f39b80e3 | 462 | gpc_unmap(old_pfn, old_khva); |
982ed0de DW |
463 | } |
464 | } |