Commit | Line | Data |
---|---|---|
982ed0de DW |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* | |
3 | * Kernel-based Virtual Machine driver for Linux | |
4 | * | |
5 | * This module enables kernel and guest-mode vCPU access to guest physical | |
6 | * memory with suitable invalidation mechanisms. | |
7 | * | |
8 | * Copyright © 2021 Amazon.com, Inc. or its affiliates. | |
9 | * | |
10 | * Authors: | |
11 | * David Woodhouse <dwmw2@infradead.org> | |
12 | */ | |
13 | ||
14 | #include <linux/kvm_host.h> | |
15 | #include <linux/kvm.h> | |
16 | #include <linux/highmem.h> | |
17 | #include <linux/module.h> | |
18 | #include <linux/errno.h> | |
19 | ||
20 | #include "kvm_mm.h" | |
21 | ||
22 | /* | |
23 | * MMU notifier 'invalidate_range_start' hook. | |
24 | */ | |
25 | void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start, | |
eefb85b3 | 26 | unsigned long end) |
982ed0de | 27 | { |
982ed0de | 28 | struct gfn_to_pfn_cache *gpc; |
982ed0de DW |
29 | |
30 | spin_lock(&kvm->gpc_lock); | |
31 | list_for_each_entry(gpc, &kvm->gpc_list, list) { | |
9fa336e3 | 32 | read_lock_irq(&gpc->lock); |
982ed0de DW |
33 | |
34 | /* Only a single page so no need to care about length */ | |
35 | if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) && | |
36 | gpc->uhva >= start && gpc->uhva < end) { | |
9fa336e3 PD |
37 | read_unlock_irq(&gpc->lock); |
38 | ||
39 | /* | |
40 | * There is a small window here where the cache could | |
41 | * be modified, and invalidation would no longer be | |
42 | * necessary. Hence check again whether invalidation | |
43 | * is still necessary once the write lock has been | |
44 | * acquired. | |
45 | */ | |
46 | ||
47 | write_lock_irq(&gpc->lock); | |
48 | if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) && | |
49 | gpc->uhva >= start && gpc->uhva < end) | |
50 | gpc->valid = false; | |
51 | write_unlock_irq(&gpc->lock); | |
52 | continue; | |
982ed0de | 53 | } |
9fa336e3 PD |
54 | |
55 | read_unlock_irq(&gpc->lock); | |
982ed0de DW |
56 | } |
57 | spin_unlock(&kvm->gpc_lock); | |
982ed0de DW |
58 | } |
59 | ||
18f06e97 SC |
60 | static bool kvm_gpc_is_valid_len(gpa_t gpa, unsigned long uhva, |
61 | unsigned long len) | |
62 | { | |
63 | unsigned long offset = kvm_is_error_gpa(gpa) ? offset_in_page(uhva) : | |
64 | offset_in_page(gpa); | |
65 | ||
66 | /* | |
67 | * The cached access must fit within a single page. The 'len' argument | |
68 | * to activate() and refresh() exists only to enforce that. | |
69 | */ | |
70 | return offset + len <= PAGE_SIZE; | |
71 | } | |
72 | ||
58f5ee5f | 73 | bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len) |
982ed0de | 74 | { |
e308c24a | 75 | struct kvm_memslots *slots = kvm_memslots(gpc->kvm); |
982ed0de | 76 | |
ecbcf030 SC |
77 | if (!gpc->active) |
78 | return false; | |
79 | ||
721f5b0d PD |
80 | /* |
81 | * If the page was cached from a memslot, make sure the memslots have | |
82 | * not been re-configured. | |
83 | */ | |
84 | if (!kvm_is_error_gpa(gpc->gpa) && gpc->generation != slots->generation) | |
85 | return false; | |
86 | ||
87 | if (kvm_is_error_hva(gpc->uhva)) | |
982ed0de DW |
88 | return false; |
89 | ||
18f06e97 | 90 | if (!kvm_gpc_is_valid_len(gpc->gpa, gpc->uhva, len)) |
982ed0de DW |
91 | return false; |
92 | ||
93 | if (!gpc->valid) | |
94 | return false; | |
95 | ||
96 | return true; | |
97 | } | |
982ed0de | 98 | |
f39b80e3 | 99 | static void *gpc_map(kvm_pfn_t pfn) |
982ed0de | 100 | { |
f39b80e3 PD |
101 | if (pfn_valid(pfn)) |
102 | return kmap(pfn_to_page(pfn)); | |
103 | ||
982ed0de | 104 | #ifdef CONFIG_HAS_IOMEM |
f39b80e3 PD |
105 | return memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB); |
106 | #else | |
107 | return NULL; | |
982ed0de | 108 | #endif |
f39b80e3 PD |
109 | } |
110 | ||
111 | static void gpc_unmap(kvm_pfn_t pfn, void *khva) | |
112 | { | |
113 | /* Unmap the old pfn/page if it was mapped before. */ | |
114 | if (is_error_noslot_pfn(pfn) || !khva) | |
115 | return; | |
116 | ||
117 | if (pfn_valid(pfn)) { | |
118 | kunmap(pfn_to_page(pfn)); | |
119 | return; | |
982ed0de | 120 | } |
f39b80e3 PD |
121 | |
122 | #ifdef CONFIG_HAS_IOMEM | |
123 | memunmap(khva); | |
124 | #endif | |
982ed0de DW |
125 | } |
126 | ||
58cd407c | 127 | static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_seq) |
982ed0de | 128 | { |
58cd407c SC |
129 | /* |
130 | * mn_active_invalidate_count acts for all intents and purposes | |
20ec3ebd CP |
131 | * like mmu_invalidate_in_progress here; but the latter cannot |
132 | * be used here because the invalidation of caches in the | |
133 | * mmu_notifier event occurs _before_ mmu_invalidate_in_progress | |
134 | * is elevated. | |
58cd407c SC |
135 | * |
136 | * Note, it does not matter that mn_active_invalidate_count | |
137 | * is not protected by gpc->lock. It is guaranteed to | |
138 | * be elevated before the mmu_notifier acquires gpc->lock, and | |
20ec3ebd | 139 | * isn't dropped until after mmu_invalidate_seq is updated. |
58cd407c SC |
140 | */ |
141 | if (kvm->mn_active_invalidate_count) | |
142 | return true; | |
143 | ||
144 | /* | |
145 | * Ensure mn_active_invalidate_count is read before | |
20ec3ebd | 146 | * mmu_invalidate_seq. This pairs with the smp_wmb() in |
58cd407c SC |
147 | * mmu_notifier_invalidate_range_end() to guarantee either the |
148 | * old (non-zero) value of mn_active_invalidate_count or the | |
20ec3ebd | 149 | * new (incremented) value of mmu_invalidate_seq is observed. |
58cd407c SC |
150 | */ |
151 | smp_rmb(); | |
20ec3ebd | 152 | return kvm->mmu_invalidate_seq != mmu_seq; |
58cd407c SC |
153 | } |
154 | ||
2a0b128a | 155 | static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) |
58cd407c SC |
156 | { |
157 | /* Note, the new page offset may be different than the old! */ | |
406c1096 | 158 | void *old_khva = (void *)PAGE_ALIGN_DOWN((uintptr_t)gpc->khva); |
58cd407c SC |
159 | kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT; |
160 | void *new_khva = NULL; | |
982ed0de | 161 | unsigned long mmu_seq; |
58cd407c SC |
162 | |
163 | lockdep_assert_held(&gpc->refresh_lock); | |
164 | ||
165 | lockdep_assert_held_write(&gpc->lock); | |
166 | ||
167 | /* | |
168 | * Invalidate the cache prior to dropping gpc->lock, the gpa=>uhva | |
169 | * assets have already been updated and so a concurrent check() from a | |
170 | * different task may not fail the gpa/uhva/generation checks. | |
171 | */ | |
172 | gpc->valid = false; | |
982ed0de DW |
173 | |
174 | do { | |
2a0b128a | 175 | mmu_seq = gpc->kvm->mmu_invalidate_seq; |
982ed0de DW |
176 | smp_rmb(); |
177 | ||
58cd407c SC |
178 | write_unlock_irq(&gpc->lock); |
179 | ||
180 | /* | |
181 | * If the previous iteration "failed" due to an mmu_notifier | |
182 | * event, release the pfn and unmap the kernel virtual address | |
183 | * from the previous attempt. Unmapping might sleep, so this | |
184 | * needs to be done after dropping the lock. Opportunistically | |
185 | * check for resched while the lock isn't held. | |
186 | */ | |
187 | if (new_pfn != KVM_PFN_ERR_FAULT) { | |
188 | /* | |
189 | * Keep the mapping if the previous iteration reused | |
190 | * the existing mapping and didn't create a new one. | |
191 | */ | |
85165781 | 192 | if (new_khva != old_khva) |
f39b80e3 | 193 | gpc_unmap(new_pfn, new_khva); |
58cd407c | 194 | |
85165781 | 195 | kvm_release_pfn_clean(new_pfn); |
58cd407c SC |
196 | |
197 | cond_resched(); | |
198 | } | |
199 | ||
982ed0de | 200 | /* We always request a writeable mapping */ |
c8b88b33 | 201 | new_pfn = hva_to_pfn(gpc->uhva, false, false, NULL, true, NULL); |
982ed0de | 202 | if (is_error_noslot_pfn(new_pfn)) |
58cd407c | 203 | goto out_error; |
982ed0de | 204 | |
58cd407c SC |
205 | /* |
206 | * Obtain a new kernel mapping if KVM itself will access the | |
207 | * pfn. Note, kmap() and memremap() can both sleep, so this | |
208 | * too must be done outside of gpc->lock! | |
209 | */ | |
a4bff3df PD |
210 | if (new_pfn == gpc->pfn) |
211 | new_khva = old_khva; | |
212 | else | |
213 | new_khva = gpc_map(new_pfn); | |
214 | ||
215 | if (!new_khva) { | |
216 | kvm_release_pfn_clean(new_pfn); | |
217 | goto out_error; | |
58cd407c | 218 | } |
982ed0de | 219 | |
58cd407c | 220 | write_lock_irq(&gpc->lock); |
982ed0de | 221 | |
58cd407c SC |
222 | /* |
223 | * Other tasks must wait for _this_ refresh to complete before | |
224 | * attempting to refresh. | |
225 | */ | |
226 | WARN_ON_ONCE(gpc->valid); | |
2a0b128a | 227 | } while (mmu_notifier_retry_cache(gpc->kvm, mmu_seq)); |
58cd407c SC |
228 | |
229 | gpc->valid = true; | |
230 | gpc->pfn = new_pfn; | |
406c1096 | 231 | gpc->khva = new_khva + offset_in_page(gpc->uhva); |
85165781 SC |
232 | |
233 | /* | |
234 | * Put the reference to the _new_ pfn. The pfn is now tracked by the | |
235 | * cache and can be safely migrated, swapped, etc... as the cache will | |
236 | * invalidate any mappings in response to relevant mmu_notifier events. | |
237 | */ | |
238 | kvm_release_pfn_clean(new_pfn); | |
239 | ||
58cd407c SC |
240 | return 0; |
241 | ||
242 | out_error: | |
243 | write_lock_irq(&gpc->lock); | |
244 | ||
245 | return -EFAULT; | |
982ed0de DW |
246 | } |
247 | ||
5c9ca4ed | 248 | static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva) |
982ed0de | 249 | { |
721f5b0d | 250 | unsigned long page_offset; |
ecbcf030 | 251 | bool unmap_old = false; |
982ed0de | 252 | unsigned long old_uhva; |
ecbcf030 | 253 | kvm_pfn_t old_pfn; |
406c1096 | 254 | bool hva_change = false; |
982ed0de | 255 | void *old_khva; |
ecbcf030 | 256 | int ret; |
982ed0de | 257 | |
721f5b0d PD |
258 | /* Either gpa or uhva must be valid, but not both */ |
259 | if (WARN_ON_ONCE(kvm_is_error_gpa(gpa) == kvm_is_error_hva(uhva))) | |
260 | return -EINVAL; | |
261 | ||
6addfcf2 | 262 | lockdep_assert_held(&gpc->refresh_lock); |
93984f19 | 263 | |
982ed0de DW |
264 | write_lock_irq(&gpc->lock); |
265 | ||
ecbcf030 SC |
266 | if (!gpc->active) { |
267 | ret = -EINVAL; | |
268 | goto out_unlock; | |
269 | } | |
270 | ||
982ed0de | 271 | old_pfn = gpc->pfn; |
406c1096 PD |
272 | old_khva = (void *)PAGE_ALIGN_DOWN((uintptr_t)gpc->khva); |
273 | old_uhva = PAGE_ALIGN_DOWN(gpc->uhva); | |
982ed0de | 274 | |
721f5b0d | 275 | if (kvm_is_error_gpa(gpa)) { |
18f06e97 SC |
276 | page_offset = offset_in_page(uhva); |
277 | ||
721f5b0d PD |
278 | gpc->gpa = INVALID_GPA; |
279 | gpc->memslot = NULL; | |
280 | gpc->uhva = PAGE_ALIGN_DOWN(uhva); | |
982ed0de | 281 | |
406c1096 PD |
282 | if (gpc->uhva != old_uhva) |
283 | hva_change = true; | |
284 | } else { | |
721f5b0d PD |
285 | struct kvm_memslots *slots = kvm_memslots(gpc->kvm); |
286 | ||
18f06e97 SC |
287 | page_offset = offset_in_page(gpa); |
288 | ||
721f5b0d PD |
289 | if (gpc->gpa != gpa || gpc->generation != slots->generation || |
290 | kvm_is_error_hva(gpc->uhva)) { | |
291 | gfn_t gfn = gpa_to_gfn(gpa); | |
292 | ||
293 | gpc->gpa = gpa; | |
294 | gpc->generation = slots->generation; | |
295 | gpc->memslot = __gfn_to_memslot(slots, gfn); | |
296 | gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn); | |
297 | ||
298 | if (kvm_is_error_hva(gpc->uhva)) { | |
299 | ret = -EFAULT; | |
300 | goto out; | |
301 | } | |
302 | ||
303 | /* | |
304 | * Even if the GPA and/or the memslot generation changed, the | |
305 | * HVA may still be the same. | |
306 | */ | |
307 | if (gpc->uhva != old_uhva) | |
308 | hva_change = true; | |
309 | } else { | |
310 | gpc->uhva = old_uhva; | |
311 | } | |
982ed0de DW |
312 | } |
313 | ||
406c1096 PD |
314 | /* Note: the offset must be correct before calling hva_to_pfn_retry() */ |
315 | gpc->uhva += page_offset; | |
316 | ||
982ed0de DW |
317 | /* |
318 | * If the userspace HVA changed or the PFN was already invalid, | |
319 | * drop the lock and do the HVA to PFN lookup again. | |
320 | */ | |
406c1096 | 321 | if (!gpc->valid || hva_change) { |
2a0b128a | 322 | ret = hva_to_pfn_retry(gpc); |
982ed0de | 323 | } else { |
8332f0ed DW |
324 | /* |
325 | * If the HVA→PFN mapping was already valid, don't unmap it. | |
326 | * But do update gpc->khva because the offset within the page | |
327 | * may have changed. | |
328 | */ | |
329 | gpc->khva = old_khva + page_offset; | |
ecbcf030 | 330 | ret = 0; |
06e155c4 | 331 | goto out_unlock; |
982ed0de DW |
332 | } |
333 | ||
334 | out: | |
58cd407c SC |
335 | /* |
336 | * Invalidate the cache and purge the pfn/khva if the refresh failed. | |
337 | * Some/all of the uhva, gpa, and memslot generation info may still be | |
338 | * valid, leave it as is. | |
339 | */ | |
340 | if (ret) { | |
341 | gpc->valid = false; | |
342 | gpc->pfn = KVM_PFN_ERR_FAULT; | |
343 | gpc->khva = NULL; | |
344 | } | |
345 | ||
ecbcf030 SC |
346 | /* Detect a pfn change before dropping the lock! */ |
347 | unmap_old = (old_pfn != gpc->pfn); | |
58cd407c | 348 | |
ecbcf030 | 349 | out_unlock: |
982ed0de DW |
350 | write_unlock_irq(&gpc->lock); |
351 | ||
ecbcf030 | 352 | if (unmap_old) |
f39b80e3 | 353 | gpc_unmap(old_pfn, old_khva); |
982ed0de DW |
354 | |
355 | return ret; | |
356 | } | |
58f5ee5f SC |
357 | |
358 | int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len) | |
359 | { | |
6addfcf2 DW |
360 | unsigned long uhva; |
361 | ||
362 | guard(mutex)(&gpc->refresh_lock); | |
363 | ||
5c9ca4ed SC |
364 | if (!kvm_gpc_is_valid_len(gpc->gpa, gpc->uhva, len)) |
365 | return -EINVAL; | |
366 | ||
721f5b0d PD |
367 | /* |
368 | * If the GPA is valid then ignore the HVA, as a cache can be GPA-based | |
369 | * or HVA-based, not both. For GPA-based caches, the HVA will be | |
370 | * recomputed during refresh if necessary. | |
371 | */ | |
6addfcf2 | 372 | uhva = kvm_is_error_gpa(gpc->gpa) ? gpc->uhva : KVM_HVA_ERR_BAD; |
721f5b0d | 373 | |
5c9ca4ed | 374 | return __kvm_gpc_refresh(gpc, gpc->gpa, uhva); |
58f5ee5f | 375 | } |
982ed0de | 376 | |
a4bff3df | 377 | void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm) |
52491a38 ML |
378 | { |
379 | rwlock_init(&gpc->lock); | |
380 | mutex_init(&gpc->refresh_lock); | |
8c82a0b3 ML |
381 | |
382 | gpc->kvm = kvm; | |
5762cb10 | 383 | gpc->pfn = KVM_PFN_ERR_FAULT; |
721f5b0d | 384 | gpc->gpa = INVALID_GPA; |
5762cb10 | 385 | gpc->uhva = KVM_HVA_ERR_BAD; |
6addfcf2 | 386 | gpc->active = gpc->valid = false; |
52491a38 | 387 | } |
982ed0de | 388 | |
721f5b0d PD |
389 | static int __kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva, |
390 | unsigned long len) | |
982ed0de | 391 | { |
8c82a0b3 | 392 | struct kvm *kvm = gpc->kvm; |
d0d96121 | 393 | |
5c9ca4ed SC |
394 | if (!kvm_gpc_is_valid_len(gpa, uhva, len)) |
395 | return -EINVAL; | |
396 | ||
6addfcf2 DW |
397 | guard(mutex)(&gpc->refresh_lock); |
398 | ||
982ed0de | 399 | if (!gpc->active) { |
5762cb10 SC |
400 | if (KVM_BUG_ON(gpc->valid, kvm)) |
401 | return -EIO; | |
982ed0de DW |
402 | |
403 | spin_lock(&kvm->gpc_lock); | |
404 | list_add(&gpc->list, &kvm->gpc_list); | |
405 | spin_unlock(&kvm->gpc_lock); | |
ecbcf030 SC |
406 | |
407 | /* | |
408 | * Activate the cache after adding it to the list, a concurrent | |
409 | * refresh must not establish a mapping until the cache is | |
410 | * reachable by mmu_notifier events. | |
411 | */ | |
412 | write_lock_irq(&gpc->lock); | |
413 | gpc->active = true; | |
414 | write_unlock_irq(&gpc->lock); | |
982ed0de | 415 | } |
5c9ca4ed | 416 | return __kvm_gpc_refresh(gpc, gpa, uhva); |
721f5b0d PD |
417 | } |
418 | ||
419 | int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len) | |
420 | { | |
fc62a4e8 SC |
421 | /* |
422 | * Explicitly disallow INVALID_GPA so that the magic value can be used | |
423 | * by KVM to differentiate between GPA-based and HVA-based caches. | |
424 | */ | |
425 | if (WARN_ON_ONCE(kvm_is_error_gpa(gpa))) | |
426 | return -EINVAL; | |
427 | ||
721f5b0d PD |
428 | return __kvm_gpc_activate(gpc, gpa, KVM_HVA_ERR_BAD, len); |
429 | } | |
430 | ||
431 | int kvm_gpc_activate_hva(struct gfn_to_pfn_cache *gpc, unsigned long uhva, unsigned long len) | |
432 | { | |
433 | return __kvm_gpc_activate(gpc, INVALID_GPA, uhva, len); | |
982ed0de | 434 | } |
982ed0de | 435 | |
8c82a0b3 | 436 | void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc) |
982ed0de | 437 | { |
8c82a0b3 | 438 | struct kvm *kvm = gpc->kvm; |
9f87791d SC |
439 | kvm_pfn_t old_pfn; |
440 | void *old_khva; | |
8c82a0b3 | 441 | |
6addfcf2 DW |
442 | guard(mutex)(&gpc->refresh_lock); |
443 | ||
982ed0de | 444 | if (gpc->active) { |
ecbcf030 SC |
445 | /* |
446 | * Deactivate the cache before removing it from the list, KVM | |
447 | * must stall mmu_notifier events until all users go away, i.e. | |
448 | * until gpc->lock is dropped and refresh is guaranteed to fail. | |
449 | */ | |
450 | write_lock_irq(&gpc->lock); | |
451 | gpc->active = false; | |
9f87791d SC |
452 | gpc->valid = false; |
453 | ||
454 | /* | |
455 | * Leave the GPA => uHVA cache intact, it's protected by the | |
456 | * memslot generation. The PFN lookup needs to be redone every | |
457 | * time as mmu_notifier protection is lost when the cache is | |
458 | * removed from the VM's gpc_list. | |
459 | */ | |
460 | old_khva = gpc->khva - offset_in_page(gpc->khva); | |
461 | gpc->khva = NULL; | |
462 | ||
463 | old_pfn = gpc->pfn; | |
464 | gpc->pfn = KVM_PFN_ERR_FAULT; | |
ecbcf030 SC |
465 | write_unlock_irq(&gpc->lock); |
466 | ||
982ed0de DW |
467 | spin_lock(&kvm->gpc_lock); |
468 | list_del(&gpc->list); | |
469 | spin_unlock(&kvm->gpc_lock); | |
470 | ||
f39b80e3 | 471 | gpc_unmap(old_pfn, old_khva); |
982ed0de DW |
472 | } |
473 | } |