Commit | Line | Data |
---|---|---|
d2912cb1 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
9e04ba69 | 2 | /* |
9e04ba69 PM |
3 | * |
4 | * Copyright 2016 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | |
5 | */ | |
6 | ||
7 | #include <linux/types.h> | |
8 | #include <linux/string.h> | |
9 | #include <linux/kvm.h> | |
10 | #include <linux/kvm_host.h> | |
9a94d3ee PM |
11 | #include <linux/anon_inodes.h> |
12 | #include <linux/file.h> | |
13 | #include <linux/debugfs.h> | |
65fddcfc | 14 | #include <linux/pgtable.h> |
9e04ba69 PM |
15 | |
16 | #include <asm/kvm_ppc.h> | |
17 | #include <asm/kvm_book3s.h> | |
ebc88ea7 | 18 | #include "book3s_hv.h" |
9e04ba69 PM |
19 | #include <asm/page.h> |
20 | #include <asm/mmu.h> | |
9e04ba69 | 21 | #include <asm/pgalloc.h> |
94171b19 | 22 | #include <asm/pte-walk.h> |
008e359c BR |
23 | #include <asm/ultravisor.h> |
24 | #include <asm/kvm_book3s_uvmem.h> | |
81468083 | 25 | #include <asm/plpar_wrappers.h> |
46d60bdb | 26 | #include <asm/firmware.h> |
9e04ba69 PM |
27 | |
28 | /* | |
29 | * Supported radix tree geometry. | |
30 | * Like p9, we support either 5 or 9 bits at the first (lowest) level, | |
31 | * for a page size of 64k or 4k. | |
32 | */ | |
33 | static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 }; | |
34 | ||
6ff887b8 SJS |
35 | unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, |
36 | gva_t eaddr, void *to, void *from, | |
37 | unsigned long n) | |
d7b45615 | 38 | { |
3f649ab7 | 39 | int old_pid, old_lpid; |
d7b45615 | 40 | unsigned long quadrant, ret = n; |
d7b45615 SJS |
41 | bool is_load = !!to; |
42 | ||
4bc8ff6f JN |
43 | if (kvmhv_is_nestedv2()) |
44 | return H_UNSUPPORTED; | |
45 | ||
95d386c2 SJS |
46 | /* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */ |
47 | if (kvmhv_on_pseries()) | |
48 | return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr, | |
c1ed1754 AK |
49 | (to != NULL) ? __pa(to): 0, |
50 | (from != NULL) ? __pa(from): 0, n); | |
d7b45615 | 51 | |
c232461c FR |
52 | if (eaddr & (0xFFFUL << 52)) |
53 | return ret; | |
54 | ||
d7b45615 SJS |
55 | quadrant = 1; |
56 | if (!pid) | |
57 | quadrant = 2; | |
58 | if (is_load) | |
59 | from = (void *) (eaddr | (quadrant << 62)); | |
60 | else | |
61 | to = (void *) (eaddr | (quadrant << 62)); | |
62 | ||
63 | preempt_disable(); | |
64 | ||
cf3b16cf NP |
65 | asm volatile("hwsync" ::: "memory"); |
66 | isync(); | |
d7b45615 SJS |
67 | /* switch the lpid first to avoid running host with unallocated pid */ |
68 | old_lpid = mfspr(SPRN_LPID); | |
69 | if (old_lpid != lpid) | |
70 | mtspr(SPRN_LPID, lpid); | |
71 | if (quadrant == 1) { | |
72 | old_pid = mfspr(SPRN_PID); | |
73 | if (old_pid != pid) | |
74 | mtspr(SPRN_PID, pid); | |
75 | } | |
76 | isync(); | |
77 | ||
5d7d6dac | 78 | pagefault_disable(); |
d7b45615 | 79 | if (is_load) |
5d7d6dac | 80 | ret = __copy_from_user_inatomic(to, (const void __user *)from, n); |
d7b45615 | 81 | else |
5d7d6dac FR |
82 | ret = __copy_to_user_inatomic((void __user *)to, from, n); |
83 | pagefault_enable(); | |
d7b45615 | 84 | |
cf3b16cf NP |
85 | asm volatile("hwsync" ::: "memory"); |
86 | isync(); | |
d7b45615 SJS |
87 | /* switch the pid first to avoid running host with unallocated pid */ |
88 | if (quadrant == 1 && pid != old_pid) | |
89 | mtspr(SPRN_PID, old_pid); | |
90 | if (lpid != old_lpid) | |
91 | mtspr(SPRN_LPID, old_lpid); | |
92 | isync(); | |
93 | ||
94 | preempt_enable(); | |
95 | ||
96 | return ret; | |
97 | } | |
98 | ||
99 | static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, | |
100 | void *to, void *from, unsigned long n) | |
101 | { | |
102 | int lpid = vcpu->kvm->arch.lpid; | |
e678748a | 103 | int pid; |
d7b45615 SJS |
104 | |
105 | /* This would cause a data segment intr so don't allow the access */ | |
106 | if (eaddr & (0x3FFUL << 52)) | |
107 | return -EINVAL; | |
108 | ||
109 | /* Should we be using the nested lpid */ | |
110 | if (vcpu->arch.nested) | |
111 | lpid = vcpu->arch.nested->shadow_lpid; | |
112 | ||
113 | /* If accessing quadrant 3 then pid is expected to be 0 */ | |
114 | if (((eaddr >> 62) & 0x3) == 0x3) | |
115 | pid = 0; | |
e678748a JN |
116 | else |
117 | pid = kvmppc_get_pid(vcpu); | |
d7b45615 SJS |
118 | |
119 | eaddr &= ~(0xFFFUL << 52); | |
120 | ||
121 | return __kvmhv_copy_tofrom_guest_radix(lpid, pid, eaddr, to, from, n); | |
122 | } | |
123 | ||
124 | long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to, | |
125 | unsigned long n) | |
126 | { | |
127 | long ret; | |
128 | ||
129 | ret = kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, to, NULL, n); | |
130 | if (ret > 0) | |
131 | memset(to + (n - ret), 0, ret); | |
132 | ||
133 | return ret; | |
134 | } | |
d7b45615 SJS |
135 | |
136 | long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *from, | |
137 | unsigned long n) | |
138 | { | |
139 | return kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, NULL, from, n); | |
140 | } | |
d7b45615 | 141 | |
fd10be25 SJS |
142 | int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr, |
143 | struct kvmppc_pte *gpte, u64 root, | |
144 | u64 *pte_ret_p) | |
9e04ba69 PM |
145 | { |
146 | struct kvm *kvm = vcpu->kvm; | |
9e04ba69 | 147 | int ret, level, ps; |
fd10be25 | 148 | unsigned long rts, bits, offset, index; |
9811c78e SJS |
149 | u64 pte, base, gpa; |
150 | __be64 rpte; | |
9e04ba69 | 151 | |
9e04ba69 PM |
152 | rts = ((root & RTS1_MASK) >> (RTS1_SHIFT - 3)) | |
153 | ((root & RTS2_MASK) >> RTS2_SHIFT); | |
154 | bits = root & RPDS_MASK; | |
9811c78e | 155 | base = root & RPDB_MASK; |
9e04ba69 | 156 | |
9e04ba69 | 157 | offset = rts + 31; |
9e04ba69 | 158 | |
9811c78e | 159 | /* Current implementations only support 52-bit space */ |
9e04ba69 PM |
160 | if (offset != 52) |
161 | return -EINVAL; | |
162 | ||
9811c78e | 163 | /* Walk each level of the radix tree */ |
9e04ba69 | 164 | for (level = 3; level >= 0; --level) { |
fd10be25 | 165 | u64 addr; |
9811c78e | 166 | /* Check a valid size */ |
9e04ba69 PM |
167 | if (level && bits != p9_supported_radix_bits[level]) |
168 | return -EINVAL; | |
169 | if (level == 0 && !(bits == 5 || bits == 9)) | |
170 | return -EINVAL; | |
171 | offset -= bits; | |
172 | index = (eaddr >> offset) & ((1UL << bits) - 1); | |
9811c78e SJS |
173 | /* Check that low bits of page table base are zero */ |
174 | if (base & ((1UL << (bits + 3)) - 1)) | |
9e04ba69 | 175 | return -EINVAL; |
9811c78e | 176 | /* Read the entry from guest memory */ |
fd10be25 | 177 | addr = base + (index * sizeof(rpte)); |
2031f287 SC |
178 | |
179 | kvm_vcpu_srcu_read_lock(vcpu); | |
fd10be25 | 180 | ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte)); |
2031f287 | 181 | kvm_vcpu_srcu_read_unlock(vcpu); |
fd10be25 SJS |
182 | if (ret) { |
183 | if (pte_ret_p) | |
184 | *pte_ret_p = addr; | |
9e04ba69 | 185 | return ret; |
fd10be25 | 186 | } |
9e04ba69 PM |
187 | pte = __be64_to_cpu(rpte); |
188 | if (!(pte & _PAGE_PRESENT)) | |
189 | return -ENOENT; | |
9811c78e | 190 | /* Check if a leaf entry */ |
9e04ba69 PM |
191 | if (pte & _PAGE_PTE) |
192 | break; | |
9811c78e SJS |
193 | /* Get ready to walk the next level */ |
194 | base = pte & RPDB_MASK; | |
195 | bits = pte & RPDS_MASK; | |
9e04ba69 | 196 | } |
9811c78e SJS |
197 | |
198 | /* Need a leaf at lowest level; 512GB pages not supported */ | |
9e04ba69 PM |
199 | if (level < 0 || level == 3) |
200 | return -EINVAL; | |
201 | ||
9811c78e SJS |
202 | /* We found a valid leaf PTE */ |
203 | /* Offset is now log base 2 of the page size */ | |
9e04ba69 PM |
204 | gpa = pte & 0x01fffffffffff000ul; |
205 | if (gpa & ((1ul << offset) - 1)) | |
206 | return -EINVAL; | |
9811c78e | 207 | gpa |= eaddr & ((1ul << offset) - 1); |
9e04ba69 PM |
208 | for (ps = MMU_PAGE_4K; ps < MMU_PAGE_COUNT; ++ps) |
209 | if (offset == mmu_psize_defs[ps].shift) | |
210 | break; | |
211 | gpte->page_size = ps; | |
fd10be25 | 212 | gpte->page_shift = offset; |
9e04ba69 PM |
213 | |
214 | gpte->eaddr = eaddr; | |
215 | gpte->raddr = gpa; | |
216 | ||
217 | /* Work out permissions */ | |
218 | gpte->may_read = !!(pte & _PAGE_READ); | |
219 | gpte->may_write = !!(pte & _PAGE_WRITE); | |
220 | gpte->may_execute = !!(pte & _PAGE_EXEC); | |
9811c78e | 221 | |
fd10be25 SJS |
222 | gpte->rc = pte & (_PAGE_ACCESSED | _PAGE_DIRTY); |
223 | ||
9811c78e SJS |
224 | if (pte_ret_p) |
225 | *pte_ret_p = pte; | |
226 | ||
227 | return 0; | |
228 | } | |
229 | ||
fd10be25 SJS |
230 | /* |
231 | * Used to walk a partition or process table radix tree in guest memory | |
232 | * Note: We exploit the fact that a partition table and a process | |
233 | * table have the same layout, a partition-scoped page table and a | |
234 | * process-scoped page table have the same layout, and the 2nd | |
235 | * doubleword of a partition table entry has the same layout as | |
236 | * the PTCR register. | |
237 | */ | |
238 | int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr, | |
239 | struct kvmppc_pte *gpte, u64 table, | |
240 | int table_index, u64 *pte_ret_p) | |
241 | { | |
242 | struct kvm *kvm = vcpu->kvm; | |
243 | int ret; | |
244 | unsigned long size, ptbl, root; | |
245 | struct prtb_entry entry; | |
246 | ||
247 | if ((table & PRTS_MASK) > 24) | |
248 | return -EINVAL; | |
249 | size = 1ul << ((table & PRTS_MASK) + 12); | |
250 | ||
251 | /* Is the table big enough to contain this entry? */ | |
252 | if ((table_index * sizeof(entry)) >= size) | |
253 | return -EINVAL; | |
254 | ||
255 | /* Read the table to find the root of the radix tree */ | |
256 | ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry)); | |
2031f287 | 257 | kvm_vcpu_srcu_read_lock(vcpu); |
fd10be25 | 258 | ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry)); |
2031f287 | 259 | kvm_vcpu_srcu_read_unlock(vcpu); |
fd10be25 SJS |
260 | if (ret) |
261 | return ret; | |
262 | ||
263 | /* Root is stored in the first double word */ | |
264 | root = be64_to_cpu(entry.prtb0); | |
265 | ||
266 | return kvmppc_mmu_walk_radix_tree(vcpu, eaddr, gpte, root, pte_ret_p); | |
267 | } | |
268 | ||
9811c78e SJS |
269 | int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, |
270 | struct kvmppc_pte *gpte, bool data, bool iswrite) | |
271 | { | |
272 | u32 pid; | |
273 | u64 pte; | |
274 | int ret; | |
275 | ||
276 | /* Work out effective PID */ | |
277 | switch (eaddr >> 62) { | |
278 | case 0: | |
7028ac8d | 279 | pid = kvmppc_get_pid(vcpu); |
9811c78e SJS |
280 | break; |
281 | case 3: | |
282 | pid = 0; | |
283 | break; | |
284 | default: | |
285 | return -EINVAL; | |
286 | } | |
287 | ||
288 | ret = kvmppc_mmu_radix_translate_table(vcpu, eaddr, gpte, | |
289 | vcpu->kvm->arch.process_table, pid, &pte); | |
290 | if (ret) | |
291 | return ret; | |
292 | ||
293 | /* Check privilege (applies only to process scoped translations) */ | |
9e04ba69 PM |
294 | if (kvmppc_get_msr(vcpu) & MSR_PR) { |
295 | if (pte & _PAGE_PRIVILEGED) { | |
296 | gpte->may_read = 0; | |
297 | gpte->may_write = 0; | |
298 | gpte->may_execute = 0; | |
299 | } | |
300 | } else { | |
301 | if (!(pte & _PAGE_PRIVILEGED)) { | |
302 | /* Check AMR/IAMR to see if strict mode is in force */ | |
ebc88ea7 | 303 | if (kvmppc_get_amr_hv(vcpu) & (1ul << 62)) |
9e04ba69 | 304 | gpte->may_read = 0; |
ebc88ea7 | 305 | if (kvmppc_get_amr_hv(vcpu) & (1ul << 63)) |
9e04ba69 PM |
306 | gpte->may_write = 0; |
307 | if (vcpu->arch.iamr & (1ul << 62)) | |
308 | gpte->may_execute = 0; | |
309 | } | |
310 | } | |
311 | ||
312 | return 0; | |
313 | } | |
314 | ||
90165d3d | 315 | void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, |
dfcaacc8 | 316 | unsigned int pshift, u64 lpid) |
5a319350 | 317 | { |
d91cb39f | 318 | unsigned long psize = PAGE_SIZE; |
690ed4ca PM |
319 | int psi; |
320 | long rc; | |
321 | unsigned long rb; | |
d91cb39f NP |
322 | |
323 | if (pshift) | |
324 | psize = 1UL << pshift; | |
690ed4ca PM |
325 | else |
326 | pshift = PAGE_SHIFT; | |
d91cb39f NP |
327 | |
328 | addr &= ~(psize - 1); | |
690ed4ca PM |
329 | |
330 | if (!kvmhv_on_pseries()) { | |
331 | radix__flush_tlb_lpid_page(lpid, addr, psize); | |
332 | return; | |
333 | } | |
334 | ||
335 | psi = shift_to_mmu_psize(pshift); | |
81468083 BR |
336 | |
337 | if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) { | |
338 | rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58)); | |
339 | rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1), | |
340 | lpid, rb); | |
341 | } else { | |
342 | rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU, | |
343 | H_RPTI_TYPE_NESTED | | |
344 | H_RPTI_TYPE_TLB, | |
345 | psize_to_rpti_pgsize(psi), | |
346 | addr, addr + psize); | |
347 | } | |
348 | ||
690ed4ca PM |
349 | if (rc) |
350 | pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc); | |
5a319350 PM |
351 | } |
352 | ||
dfcaacc8 | 353 | static void kvmppc_radix_flush_pwc(struct kvm *kvm, u64 lpid) |
c4c8a764 | 354 | { |
690ed4ca PM |
355 | long rc; |
356 | ||
357 | if (!kvmhv_on_pseries()) { | |
358 | radix__flush_pwc_lpid(lpid); | |
359 | return; | |
360 | } | |
361 | ||
81468083 BR |
362 | if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) |
363 | rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1), | |
364 | lpid, TLBIEL_INVAL_SET_LPID); | |
365 | else | |
366 | rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU, | |
367 | H_RPTI_TYPE_NESTED | | |
368 | H_RPTI_TYPE_PWC, H_RPTI_PAGE_ALL, | |
369 | 0, -1UL); | |
690ed4ca PM |
370 | if (rc) |
371 | pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc); | |
c4c8a764 PM |
372 | } |
373 | ||
878cf2bb | 374 | static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, |
8f7b79b8 PM |
375 | unsigned long clr, unsigned long set, |
376 | unsigned long addr, unsigned int shift) | |
5a319350 | 377 | { |
2bf1071a | 378 | return __radix_pte_update(ptep, clr, set); |
5a319350 PM |
379 | } |
380 | ||
cf59eb13 | 381 | static void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr, |
5a319350 PM |
382 | pte_t *ptep, pte_t pte) |
383 | { | |
384 | radix__set_pte_at(kvm->mm, addr, ptep, pte, 0); | |
385 | } | |
386 | ||
387 | static struct kmem_cache *kvm_pte_cache; | |
21828c99 | 388 | static struct kmem_cache *kvm_pmd_cache; |
5a319350 PM |
389 | |
390 | static pte_t *kvmppc_pte_alloc(void) | |
391 | { | |
0aca8a55 QC |
392 | pte_t *pte; |
393 | ||
394 | pte = kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL); | |
395 | /* pmd_populate() will only reference _pa(pte). */ | |
396 | kmemleak_ignore(pte); | |
397 | ||
398 | return pte; | |
5a319350 PM |
399 | } |
400 | ||
401 | static void kvmppc_pte_free(pte_t *ptep) | |
402 | { | |
403 | kmem_cache_free(kvm_pte_cache, ptep); | |
404 | } | |
405 | ||
21828c99 AK |
406 | static pmd_t *kvmppc_pmd_alloc(void) |
407 | { | |
0aca8a55 QC |
408 | pmd_t *pmd; |
409 | ||
410 | pmd = kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL); | |
411 | /* pud_populate() will only reference _pa(pmd). */ | |
412 | kmemleak_ignore(pmd); | |
413 | ||
414 | return pmd; | |
21828c99 AK |
415 | } |
416 | ||
417 | static void kvmppc_pmd_free(pmd_t *pmdp) | |
418 | { | |
419 | kmem_cache_free(kvm_pmd_cache, pmdp); | |
420 | } | |
421 | ||
8cf531ed SJS |
422 | /* Called with kvm->mmu_lock held */ |
423 | void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, | |
c43c3a86 PM |
424 | unsigned int shift, |
425 | const struct kvm_memory_slot *memslot, | |
dfcaacc8 | 426 | u64 lpid) |
a5fad1e9 NP |
427 | |
428 | { | |
a5fad1e9 | 429 | unsigned long old; |
8cf531ed SJS |
430 | unsigned long gfn = gpa >> PAGE_SHIFT; |
431 | unsigned long page_size = PAGE_SIZE; | |
432 | unsigned long hpa; | |
a5fad1e9 NP |
433 | |
434 | old = kvmppc_radix_update_pte(kvm, pte, ~0UL, 0, gpa, shift); | |
fd10be25 | 435 | kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid); |
a5fad1e9 | 436 | |
8cf531ed SJS |
437 | /* The following only applies to L1 entries */ |
438 | if (lpid != kvm->arch.lpid) | |
439 | return; | |
a5fad1e9 | 440 | |
8cf531ed | 441 | if (!memslot) { |
a5fad1e9 | 442 | memslot = gfn_to_memslot(kvm, gfn); |
f0f825f0 | 443 | if (!memslot) |
8cf531ed | 444 | return; |
a5fad1e9 | 445 | } |
8f1f7b9b | 446 | if (shift) { /* 1GB or 2MB page */ |
8cf531ed | 447 | page_size = 1ul << shift; |
8f1f7b9b SJS |
448 | if (shift == PMD_SHIFT) |
449 | kvm->stat.num_2M_pages--; | |
450 | else if (shift == PUD_SHIFT) | |
451 | kvm->stat.num_1G_pages--; | |
452 | } | |
8cf531ed SJS |
453 | |
454 | gpa &= ~(page_size - 1); | |
455 | hpa = old & PTE_RPN_MASK; | |
456 | kvmhv_remove_nest_rmap_range(kvm, memslot, gpa, hpa, page_size); | |
457 | ||
458 | if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) | |
459 | kvmppc_update_dirty_map(memslot, gfn, page_size); | |
a5fad1e9 NP |
460 | } |
461 | ||
a5704e83 NP |
462 | /* |
463 | * kvmppc_free_p?d are used to free existing page tables, and recursively | |
464 | * descend and clear and free children. | |
465 | * Callers are responsible for flushing the PWC. | |
466 | * | |
467 | * When page tables are being unmapped/freed as part of page fault path | |
3d89c2ef PM |
468 | * (full == false), valid ptes are generally not expected; however, there |
469 | * is one situation where they arise, which is when dirty page logging is | |
470 | * turned off for a memslot while the VM is running. The new memslot | |
471 | * becomes visible to page faults before the memslot commit function | |
472 | * gets to flush the memslot, which can lead to a 2MB page mapping being | |
473 | * installed for a guest physical address where there are already 64kB | |
474 | * (or 4kB) mappings (of sub-pages of the same 2MB page). | |
a5704e83 | 475 | */ |
fd10be25 | 476 | static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full, |
dfcaacc8 | 477 | u64 lpid) |
a5704e83 NP |
478 | { |
479 | if (full) { | |
afd31356 | 480 | memset(pte, 0, sizeof(long) << RADIX_PTE_INDEX_SIZE); |
a5704e83 NP |
481 | } else { |
482 | pte_t *p = pte; | |
483 | unsigned long it; | |
484 | ||
485 | for (it = 0; it < PTRS_PER_PTE; ++it, ++p) { | |
486 | if (pte_val(*p) == 0) | |
487 | continue; | |
a5704e83 NP |
488 | kvmppc_unmap_pte(kvm, p, |
489 | pte_pfn(*p) << PAGE_SHIFT, | |
fd10be25 | 490 | PAGE_SHIFT, NULL, lpid); |
a5704e83 NP |
491 | } |
492 | } | |
493 | ||
494 | kvmppc_pte_free(pte); | |
495 | } | |
496 | ||
fd10be25 | 497 | static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full, |
dfcaacc8 | 498 | u64 lpid) |
a5704e83 NP |
499 | { |
500 | unsigned long im; | |
501 | pmd_t *p = pmd; | |
502 | ||
503 | for (im = 0; im < PTRS_PER_PMD; ++im, ++p) { | |
504 | if (!pmd_present(*p)) | |
505 | continue; | |
506 | if (pmd_is_leaf(*p)) { | |
507 | if (full) { | |
508 | pmd_clear(p); | |
509 | } else { | |
510 | WARN_ON_ONCE(1); | |
511 | kvmppc_unmap_pte(kvm, (pte_t *)p, | |
512 | pte_pfn(*(pte_t *)p) << PAGE_SHIFT, | |
fd10be25 | 513 | PMD_SHIFT, NULL, lpid); |
a5704e83 NP |
514 | } |
515 | } else { | |
516 | pte_t *pte; | |
517 | ||
d00ae31f | 518 | pte = pte_offset_kernel(p, 0); |
fd10be25 | 519 | kvmppc_unmap_free_pte(kvm, pte, full, lpid); |
a5704e83 NP |
520 | pmd_clear(p); |
521 | } | |
522 | } | |
523 | kvmppc_pmd_free(pmd); | |
524 | } | |
525 | ||
fd10be25 | 526 | static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud, |
dfcaacc8 | 527 | u64 lpid) |
a5704e83 NP |
528 | { |
529 | unsigned long iu; | |
530 | pud_t *p = pud; | |
531 | ||
532 | for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) { | |
533 | if (!pud_present(*p)) | |
534 | continue; | |
d6eacedd | 535 | if (pud_is_leaf(*p)) { |
a5704e83 NP |
536 | pud_clear(p); |
537 | } else { | |
538 | pmd_t *pmd; | |
539 | ||
540 | pmd = pmd_offset(p, 0); | |
fd10be25 | 541 | kvmppc_unmap_free_pmd(kvm, pmd, true, lpid); |
a5704e83 NP |
542 | pud_clear(p); |
543 | } | |
544 | } | |
545 | pud_free(kvm->mm, pud); | |
546 | } | |
547 | ||
dfcaacc8 | 548 | void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, u64 lpid) |
a5704e83 NP |
549 | { |
550 | unsigned long ig; | |
a5704e83 | 551 | |
a5704e83 | 552 | for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) { |
2fb47060 | 553 | p4d_t *p4d = p4d_offset(pgd, 0); |
a5704e83 NP |
554 | pud_t *pud; |
555 | ||
2fb47060 | 556 | if (!p4d_present(*p4d)) |
a5704e83 | 557 | continue; |
2fb47060 | 558 | pud = pud_offset(p4d, 0); |
fd10be25 | 559 | kvmppc_unmap_free_pud(kvm, pud, lpid); |
2fb47060 | 560 | p4d_clear(p4d); |
a5704e83 | 561 | } |
fd10be25 SJS |
562 | } |
563 | ||
564 | void kvmppc_free_radix(struct kvm *kvm) | |
565 | { | |
566 | if (kvm->arch.pgtable) { | |
567 | kvmppc_free_pgtable_radix(kvm, kvm->arch.pgtable, | |
568 | kvm->arch.lpid); | |
569 | pgd_free(kvm->mm, kvm->arch.pgtable); | |
570 | kvm->arch.pgtable = NULL; | |
571 | } | |
a5704e83 NP |
572 | } |
573 | ||
574 | static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd, | |
dfcaacc8 | 575 | unsigned long gpa, u64 lpid) |
a5704e83 NP |
576 | { |
577 | pte_t *pte = pte_offset_kernel(pmd, 0); | |
578 | ||
579 | /* | |
580 | * Clearing the pmd entry then flushing the PWC ensures that the pte | |
581 | * page no longer be cached by the MMU, so can be freed without | |
582 | * flushing the PWC again. | |
583 | */ | |
584 | pmd_clear(pmd); | |
fd10be25 | 585 | kvmppc_radix_flush_pwc(kvm, lpid); |
a5704e83 | 586 | |
fd10be25 | 587 | kvmppc_unmap_free_pte(kvm, pte, false, lpid); |
a5704e83 NP |
588 | } |
589 | ||
590 | static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud, | |
dfcaacc8 | 591 | unsigned long gpa, u64 lpid) |
a5704e83 NP |
592 | { |
593 | pmd_t *pmd = pmd_offset(pud, 0); | |
594 | ||
595 | /* | |
596 | * Clearing the pud entry then flushing the PWC ensures that the pmd | |
597 | * page and any children pte pages will no longer be cached by the MMU, | |
598 | * so can be freed without flushing the PWC again. | |
599 | */ | |
600 | pud_clear(pud); | |
fd10be25 | 601 | kvmppc_radix_flush_pwc(kvm, lpid); |
a5704e83 | 602 | |
fd10be25 | 603 | kvmppc_unmap_free_pmd(kvm, pmd, false, lpid); |
a5704e83 NP |
604 | } |
605 | ||
878cf2bb NP |
606 | /* |
607 | * There are a number of bits which may differ between different faults to | |
608 | * the same partition scope entry. RC bits, in the course of cleaning and | |
609 | * aging. And the write bit can change, either the access could have been | |
610 | * upgraded, or a read fault could happen concurrently with a write fault | |
611 | * that sets those bits first. | |
612 | */ | |
613 | #define PTE_BITS_MUST_MATCH (~(_PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED)) | |
614 | ||
fd10be25 SJS |
615 | int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, |
616 | unsigned long gpa, unsigned int level, | |
dfcaacc8 | 617 | unsigned long mmu_seq, u64 lpid, |
8cf531ed | 618 | unsigned long *rmapp, struct rmap_nested **n_rmap) |
5a319350 PM |
619 | { |
620 | pgd_t *pgd; | |
2fb47060 | 621 | p4d_t *p4d; |
5a319350 PM |
622 | pud_t *pud, *new_pud = NULL; |
623 | pmd_t *pmd, *new_pmd = NULL; | |
624 | pte_t *ptep, *new_ptep = NULL; | |
625 | int ret; | |
626 | ||
627 | /* Traverse the guest's 2nd-level tree, allocate new levels needed */ | |
04bae9d5 | 628 | pgd = pgtable + pgd_index(gpa); |
2fb47060 MR |
629 | p4d = p4d_offset(pgd, gpa); |
630 | ||
5a319350 | 631 | pud = NULL; |
2fb47060 MR |
632 | if (p4d_present(*p4d)) |
633 | pud = pud_offset(p4d, gpa); | |
5a319350 PM |
634 | else |
635 | new_pud = pud_alloc_one(kvm->mm, gpa); | |
636 | ||
637 | pmd = NULL; | |
d6eacedd | 638 | if (pud && pud_present(*pud) && !pud_is_leaf(*pud)) |
5a319350 | 639 | pmd = pmd_offset(pud, gpa); |
58c5c276 | 640 | else if (level <= 1) |
21828c99 | 641 | new_pmd = kvmppc_pmd_alloc(); |
5a319350 | 642 | |
c3856aeb | 643 | if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd))) |
5a319350 PM |
644 | new_ptep = kvmppc_pte_alloc(); |
645 | ||
646 | /* Check if we might have been invalidated; let the guest retry if so */ | |
647 | spin_lock(&kvm->mmu_lock); | |
648 | ret = -EAGAIN; | |
20ec3ebd | 649 | if (mmu_invalidate_retry(kvm, mmu_seq)) |
5a319350 PM |
650 | goto out_unlock; |
651 | ||
652 | /* Now traverse again under the lock and change the tree */ | |
653 | ret = -ENOMEM; | |
2fb47060 | 654 | if (p4d_none(*p4d)) { |
5a319350 PM |
655 | if (!new_pud) |
656 | goto out_unlock; | |
2fb47060 | 657 | p4d_populate(kvm->mm, p4d, new_pud); |
5a319350 PM |
658 | new_pud = NULL; |
659 | } | |
2fb47060 | 660 | pud = pud_offset(p4d, gpa); |
d6eacedd | 661 | if (pud_is_leaf(*pud)) { |
58c5c276 PM |
662 | unsigned long hgpa = gpa & PUD_MASK; |
663 | ||
878cf2bb NP |
664 | /* Check if we raced and someone else has set the same thing */ |
665 | if (level == 2) { | |
666 | if (pud_raw(*pud) == pte_raw(pte)) { | |
667 | ret = 0; | |
668 | goto out_unlock; | |
669 | } | |
670 | /* Valid 1GB page here already, add our extra bits */ | |
671 | WARN_ON_ONCE((pud_val(*pud) ^ pte_val(pte)) & | |
672 | PTE_BITS_MUST_MATCH); | |
673 | kvmppc_radix_update_pte(kvm, (pte_t *)pud, | |
674 | 0, pte_val(pte), hgpa, PUD_SHIFT); | |
675 | ret = 0; | |
676 | goto out_unlock; | |
677 | } | |
58c5c276 PM |
678 | /* |
679 | * If we raced with another CPU which has just put | |
680 | * a 1GB pte in after we saw a pmd page, try again. | |
681 | */ | |
878cf2bb | 682 | if (!new_pmd) { |
58c5c276 PM |
683 | ret = -EAGAIN; |
684 | goto out_unlock; | |
685 | } | |
58c5c276 | 686 | /* Valid 1GB page here already, remove it */ |
fd10be25 SJS |
687 | kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT, NULL, |
688 | lpid); | |
58c5c276 PM |
689 | } |
690 | if (level == 2) { | |
691 | if (!pud_none(*pud)) { | |
692 | /* | |
693 | * There's a page table page here, but we wanted to | |
694 | * install a large page, so remove and free the page | |
a5704e83 | 695 | * table page. |
58c5c276 | 696 | */ |
fd10be25 | 697 | kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa, lpid); |
58c5c276 PM |
698 | } |
699 | kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte); | |
8cf531ed SJS |
700 | if (rmapp && n_rmap) |
701 | kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap); | |
58c5c276 PM |
702 | ret = 0; |
703 | goto out_unlock; | |
704 | } | |
5a319350 PM |
705 | if (pud_none(*pud)) { |
706 | if (!new_pmd) | |
707 | goto out_unlock; | |
708 | pud_populate(kvm->mm, pud, new_pmd); | |
709 | new_pmd = NULL; | |
710 | } | |
711 | pmd = pmd_offset(pud, gpa); | |
c3856aeb PM |
712 | if (pmd_is_leaf(*pmd)) { |
713 | unsigned long lgpa = gpa & PMD_MASK; | |
714 | ||
878cf2bb NP |
715 | /* Check if we raced and someone else has set the same thing */ |
716 | if (level == 1) { | |
717 | if (pmd_raw(*pmd) == pte_raw(pte)) { | |
718 | ret = 0; | |
719 | goto out_unlock; | |
720 | } | |
721 | /* Valid 2MB page here already, add our extra bits */ | |
722 | WARN_ON_ONCE((pmd_val(*pmd) ^ pte_val(pte)) & | |
723 | PTE_BITS_MUST_MATCH); | |
724 | kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd), | |
fd10be25 | 725 | 0, pte_val(pte), lgpa, PMD_SHIFT); |
878cf2bb NP |
726 | ret = 0; |
727 | goto out_unlock; | |
728 | } | |
729 | ||
c3856aeb PM |
730 | /* |
731 | * If we raced with another CPU which has just put | |
732 | * a 2MB pte in after we saw a pte page, try again. | |
733 | */ | |
878cf2bb | 734 | if (!new_ptep) { |
c3856aeb PM |
735 | ret = -EAGAIN; |
736 | goto out_unlock; | |
737 | } | |
738 | /* Valid 2MB page here already, remove it */ | |
fd10be25 SJS |
739 | kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT, NULL, |
740 | lpid); | |
5a319350 | 741 | } |
58c5c276 PM |
742 | if (level == 1) { |
743 | if (!pmd_none(*pmd)) { | |
744 | /* | |
745 | * There's a page table page here, but we wanted to | |
746 | * install a large page, so remove and free the page | |
a5704e83 | 747 | * table page. |
58c5c276 | 748 | */ |
fd10be25 | 749 | kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa, lpid); |
5a319350 | 750 | } |
5a319350 | 751 | kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte); |
8cf531ed SJS |
752 | if (rmapp && n_rmap) |
753 | kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap); | |
58c5c276 PM |
754 | ret = 0; |
755 | goto out_unlock; | |
5a319350 | 756 | } |
58c5c276 PM |
757 | if (pmd_none(*pmd)) { |
758 | if (!new_ptep) | |
759 | goto out_unlock; | |
760 | pmd_populate(kvm->mm, pmd, new_ptep); | |
761 | new_ptep = NULL; | |
762 | } | |
763 | ptep = pte_offset_kernel(pmd, gpa); | |
764 | if (pte_present(*ptep)) { | |
765 | /* Check if someone else set the same thing */ | |
766 | if (pte_raw(*ptep) == pte_raw(pte)) { | |
767 | ret = 0; | |
768 | goto out_unlock; | |
769 | } | |
878cf2bb NP |
770 | /* Valid page here already, add our extra bits */ |
771 | WARN_ON_ONCE((pte_val(*ptep) ^ pte_val(pte)) & | |
772 | PTE_BITS_MUST_MATCH); | |
773 | kvmppc_radix_update_pte(kvm, ptep, 0, pte_val(pte), gpa, 0); | |
774 | ret = 0; | |
775 | goto out_unlock; | |
5a319350 | 776 | } |
58c5c276 | 777 | kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte); |
8cf531ed SJS |
778 | if (rmapp && n_rmap) |
779 | kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap); | |
5a319350 PM |
780 | ret = 0; |
781 | ||
782 | out_unlock: | |
783 | spin_unlock(&kvm->mmu_lock); | |
784 | if (new_pud) | |
785 | pud_free(kvm->mm, new_pud); | |
786 | if (new_pmd) | |
21828c99 | 787 | kvmppc_pmd_free(new_pmd); |
5a319350 PM |
788 | if (new_ptep) |
789 | kvmppc_pte_free(new_ptep); | |
790 | return ret; | |
791 | } | |
792 | ||
6cdf3037 | 793 | bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, bool writing, |
dfcaacc8 | 794 | unsigned long gpa, u64 lpid) |
04bae9d5 SJS |
795 | { |
796 | unsigned long pgflags; | |
797 | unsigned int shift; | |
798 | pte_t *ptep; | |
799 | ||
800 | /* | |
801 | * Need to set an R or C bit in the 2nd-level tables; | |
802 | * since we are just helping out the hardware here, | |
803 | * it is sufficient to do what the hardware does. | |
804 | */ | |
805 | pgflags = _PAGE_ACCESSED; | |
806 | if (writing) | |
807 | pgflags |= _PAGE_DIRTY; | |
6cdf3037 AK |
808 | |
809 | if (nested) | |
810 | ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift); | |
811 | else | |
812 | ptep = find_kvm_secondary_pte(kvm, gpa, &shift); | |
813 | ||
04bae9d5 SJS |
814 | if (ptep && pte_present(*ptep) && (!writing || pte_write(*ptep))) { |
815 | kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift); | |
816 | return true; | |
817 | } | |
818 | return false; | |
819 | } | |
820 | ||
fd10be25 SJS |
821 | int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, |
822 | unsigned long gpa, | |
823 | struct kvm_memory_slot *memslot, | |
824 | bool writing, bool kvm_ro, | |
825 | pte_t *inserted_pte, unsigned int *levelp) | |
5a319350 PM |
826 | { |
827 | struct kvm *kvm = vcpu->kvm; | |
31c8b0d0 | 828 | struct page *page = NULL; |
04bae9d5 SJS |
829 | unsigned long mmu_seq; |
830 | unsigned long hva, gfn = gpa >> PAGE_SHIFT; | |
31c8b0d0 PM |
831 | bool upgrade_write = false; |
832 | bool *upgrade_p = &upgrade_write; | |
5a319350 | 833 | pte_t pte, *ptep; |
5a319350 | 834 | unsigned int shift, level; |
04bae9d5 | 835 | int ret; |
f460f679 | 836 | bool large_enable; |
5a319350 | 837 | |
31c8b0d0 | 838 | /* used to check for invalidations in progress */ |
20ec3ebd | 839 | mmu_seq = kvm->mmu_invalidate_seq; |
31c8b0d0 PM |
840 | smp_rmb(); |
841 | ||
842 | /* | |
843 | * Do a fast check first, since __gfn_to_pfn_memslot doesn't | |
844 | * do it with !atomic && !async, which is how we call it. | |
845 | * We always ask for write permission since the common case | |
846 | * is that the page is writable. | |
847 | */ | |
848 | hva = gfn_to_hva_memslot(memslot, gfn); | |
dadbb612 | 849 | if (!kvm_ro && get_user_page_fast_only(hva, FOLL_WRITE, &page)) { |
31c8b0d0 PM |
850 | upgrade_write = true; |
851 | } else { | |
71d29f43 NP |
852 | unsigned long pfn; |
853 | ||
31c8b0d0 | 854 | /* Call KVM generic code to do the slow-path check */ |
c8b88b33 | 855 | pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL, |
4a42d848 | 856 | writing, upgrade_p, NULL); |
31c8b0d0 PM |
857 | if (is_error_noslot_pfn(pfn)) |
858 | return -EFAULT; | |
859 | page = NULL; | |
860 | if (pfn_valid(pfn)) { | |
861 | page = pfn_to_page(pfn); | |
862 | if (PageReserved(page)) | |
863 | page = NULL; | |
5a319350 | 864 | } |
31c8b0d0 PM |
865 | } |
866 | ||
5a319350 | 867 | /* |
71d29f43 NP |
868 | * Read the PTE from the process' radix tree and use that |
869 | * so we get the shift and attribute bits. | |
5a319350 | 870 | */ |
bda3deaa AK |
871 | spin_lock(&kvm->mmu_lock); |
872 | ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); | |
ae49deda PM |
873 | pte = __pte(0); |
874 | if (ptep) | |
bda3deaa AK |
875 | pte = READ_ONCE(*ptep); |
876 | spin_unlock(&kvm->mmu_lock); | |
6579804c PM |
877 | /* |
878 | * If the PTE disappeared temporarily due to a THP | |
879 | * collapse, just return and let the guest try again. | |
880 | */ | |
ae49deda | 881 | if (!pte_present(pte)) { |
6579804c PM |
882 | if (page) |
883 | put_page(page); | |
884 | return RESUME_GUEST; | |
885 | } | |
71d29f43 | 886 | |
f460f679 PM |
887 | /* If we're logging dirty pages, always map single pages */ |
888 | large_enable = !(memslot->flags & KVM_MEM_LOG_DIRTY_PAGES); | |
889 | ||
71d29f43 | 890 | /* Get pte level from shift/size */ |
f460f679 | 891 | if (large_enable && shift == PUD_SHIFT && |
71d29f43 NP |
892 | (gpa & (PUD_SIZE - PAGE_SIZE)) == |
893 | (hva & (PUD_SIZE - PAGE_SIZE))) { | |
894 | level = 2; | |
f460f679 | 895 | } else if (large_enable && shift == PMD_SHIFT && |
71d29f43 NP |
896 | (gpa & (PMD_SIZE - PAGE_SIZE)) == |
897 | (hva & (PMD_SIZE - PAGE_SIZE))) { | |
898 | level = 1; | |
31c8b0d0 | 899 | } else { |
71d29f43 NP |
900 | level = 0; |
901 | if (shift > PAGE_SHIFT) { | |
902 | /* | |
903 | * If the pte maps more than one page, bring over | |
904 | * bits from the virtual address to get the real | |
905 | * address of the specific single page we want. | |
906 | */ | |
907 | unsigned long rpnmask = (1ul << shift) - PAGE_SIZE; | |
908 | pte = __pte(pte_val(pte) | (hva & rpnmask)); | |
bc64dd0e | 909 | } |
5a319350 | 910 | } |
5a319350 | 911 | |
71d29f43 NP |
912 | pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED); |
913 | if (writing || upgrade_write) { | |
914 | if (pte_val(pte) & _PAGE_WRITE) | |
915 | pte = __pte(pte_val(pte) | _PAGE_DIRTY); | |
916 | } else { | |
917 | pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY)); | |
918 | } | |
919 | ||
5a319350 | 920 | /* Allocate space in the tree and write the PTE */ |
04bae9d5 | 921 | ret = kvmppc_create_pte(kvm, kvm->arch.pgtable, pte, gpa, level, |
8cf531ed | 922 | mmu_seq, kvm->arch.lpid, NULL, NULL); |
04bae9d5 SJS |
923 | if (inserted_pte) |
924 | *inserted_pte = pte; | |
925 | if (levelp) | |
926 | *levelp = level; | |
5a319350 PM |
927 | |
928 | if (page) { | |
31c8b0d0 | 929 | if (!ret && (pte_val(pte) & _PAGE_WRITE)) |
c3856aeb PM |
930 | set_page_dirty_lock(page); |
931 | put_page(page); | |
5a319350 | 932 | } |
c3856aeb | 933 | |
8f1f7b9b SJS |
934 | /* Increment number of large pages if we (successfully) inserted one */ |
935 | if (!ret) { | |
936 | if (level == 1) | |
937 | kvm->stat.num_2M_pages++; | |
938 | else if (level == 2) | |
939 | kvm->stat.num_1G_pages++; | |
940 | } | |
941 | ||
04bae9d5 SJS |
942 | return ret; |
943 | } | |
944 | ||
8c99d345 | 945 | int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu, |
04bae9d5 SJS |
946 | unsigned long ea, unsigned long dsisr) |
947 | { | |
948 | struct kvm *kvm = vcpu->kvm; | |
949 | unsigned long gpa, gfn; | |
950 | struct kvm_memory_slot *memslot; | |
951 | long ret; | |
952 | bool writing = !!(dsisr & DSISR_ISSTORE); | |
953 | bool kvm_ro = false; | |
954 | ||
955 | /* Check for unusual errors */ | |
956 | if (dsisr & DSISR_UNSUPP_MMU) { | |
957 | pr_err("KVM: Got unsupported MMU fault\n"); | |
958 | return -EFAULT; | |
959 | } | |
960 | if (dsisr & DSISR_BADACCESS) { | |
961 | /* Reflect to the guest as DSI */ | |
962 | pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr); | |
6cd5c1db NP |
963 | kvmppc_core_queue_data_storage(vcpu, |
964 | kvmppc_get_msr(vcpu) & SRR1_PREFIXED, | |
965 | ea, dsisr); | |
04bae9d5 SJS |
966 | return RESUME_GUEST; |
967 | } | |
968 | ||
969 | /* Translate the logical address */ | |
970 | gpa = vcpu->arch.fault_gpa & ~0xfffUL; | |
971 | gpa &= ~0xF000000000000000ul; | |
972 | gfn = gpa >> PAGE_SHIFT; | |
973 | if (!(dsisr & DSISR_PRTABLE_FAULT)) | |
974 | gpa |= ea & 0xfff; | |
975 | ||
008e359c BR |
976 | if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) |
977 | return kvmppc_send_page_to_uv(kvm, gfn); | |
978 | ||
04bae9d5 SJS |
979 | /* Get the corresponding memslot */ |
980 | memslot = gfn_to_memslot(kvm, gfn); | |
981 | ||
982 | /* No memslot means it's an emulated MMIO region */ | |
983 | if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { | |
984 | if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS | | |
985 | DSISR_SET_RC)) { | |
986 | /* | |
987 | * Bad address in guest page table tree, or other | |
988 | * unusual error - reflect it to the guest as DSI. | |
989 | */ | |
6cd5c1db NP |
990 | kvmppc_core_queue_data_storage(vcpu, |
991 | kvmppc_get_msr(vcpu) & SRR1_PREFIXED, | |
992 | ea, dsisr); | |
04bae9d5 SJS |
993 | return RESUME_GUEST; |
994 | } | |
8c99d345 | 995 | return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing); |
04bae9d5 SJS |
996 | } |
997 | ||
998 | if (memslot->flags & KVM_MEM_READONLY) { | |
999 | if (writing) { | |
1000 | /* give the guest a DSI */ | |
6cd5c1db NP |
1001 | kvmppc_core_queue_data_storage(vcpu, |
1002 | kvmppc_get_msr(vcpu) & SRR1_PREFIXED, | |
1003 | ea, DSISR_ISSTORE | DSISR_PROTFAULT); | |
04bae9d5 SJS |
1004 | return RESUME_GUEST; |
1005 | } | |
1006 | kvm_ro = true; | |
1007 | } | |
1008 | ||
1009 | /* Failed to set the reference/change bits */ | |
1010 | if (dsisr & DSISR_SET_RC) { | |
1011 | spin_lock(&kvm->mmu_lock); | |
6cdf3037 AK |
1012 | if (kvmppc_hv_handle_set_rc(kvm, false, writing, |
1013 | gpa, kvm->arch.lpid)) | |
04bae9d5 SJS |
1014 | dsisr &= ~DSISR_SET_RC; |
1015 | spin_unlock(&kvm->mmu_lock); | |
1016 | ||
1017 | if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE | | |
1018 | DSISR_PROTFAULT | DSISR_SET_RC))) | |
1019 | return RESUME_GUEST; | |
1020 | } | |
1021 | ||
1022 | /* Try to insert a pte */ | |
1023 | ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot, writing, | |
1024 | kvm_ro, NULL, NULL); | |
1025 | ||
c3856aeb PM |
1026 | if (ret == 0 || ret == -EAGAIN) |
1027 | ret = RESUME_GUEST; | |
5a319350 PM |
1028 | return ret; |
1029 | } | |
1030 | ||
c43c3a86 | 1031 | /* Called with kvm->mmu_lock held */ |
32b48bf8 | 1032 | void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, |
b1c5356e | 1033 | unsigned long gfn) |
01756099 PM |
1034 | { |
1035 | pte_t *ptep; | |
1036 | unsigned long gpa = gfn << PAGE_SHIFT; | |
1037 | unsigned int shift; | |
1038 | ||
008e359c BR |
1039 | if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) { |
1040 | uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT); | |
32b48bf8 | 1041 | return; |
008e359c BR |
1042 | } |
1043 | ||
4b99412e | 1044 | ptep = find_kvm_secondary_pte(kvm, gpa, &shift); |
f0f825f0 | 1045 | if (ptep && pte_present(*ptep)) |
fd10be25 SJS |
1046 | kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, |
1047 | kvm->arch.lpid); | |
01756099 PM |
1048 | } |
1049 | ||
c43c3a86 | 1050 | /* Called with kvm->mmu_lock held */ |
b1c5356e SC |
1051 | bool kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, |
1052 | unsigned long gfn) | |
01756099 PM |
1053 | { |
1054 | pte_t *ptep; | |
1055 | unsigned long gpa = gfn << PAGE_SHIFT; | |
1056 | unsigned int shift; | |
b1c5356e | 1057 | bool ref = false; |
ae59a7e1 | 1058 | unsigned long old, *rmapp; |
01756099 | 1059 | |
008e359c BR |
1060 | if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) |
1061 | return ref; | |
1062 | ||
4b99412e | 1063 | ptep = find_kvm_secondary_pte(kvm, gpa, &shift); |
01756099 | 1064 | if (ptep && pte_present(*ptep) && pte_young(*ptep)) { |
ae59a7e1 SJS |
1065 | old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0, |
1066 | gpa, shift); | |
01756099 | 1067 | /* XXX need to flush tlb here? */ |
ae59a7e1 SJS |
1068 | /* Also clear bit in ptes in shadow pgtable for nested guests */ |
1069 | rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; | |
1070 | kvmhv_update_nest_rmap_rc_list(kvm, rmapp, _PAGE_ACCESSED, 0, | |
1071 | old & PTE_RPN_MASK, | |
1072 | 1UL << shift); | |
b1c5356e | 1073 | ref = true; |
01756099 PM |
1074 | } |
1075 | return ref; | |
1076 | } | |
1077 | ||
c43c3a86 | 1078 | /* Called with kvm->mmu_lock held */ |
b1c5356e SC |
1079 | bool kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, |
1080 | unsigned long gfn) | |
1081 | ||
01756099 PM |
1082 | { |
1083 | pte_t *ptep; | |
1084 | unsigned long gpa = gfn << PAGE_SHIFT; | |
1085 | unsigned int shift; | |
b1c5356e | 1086 | bool ref = false; |
01756099 | 1087 | |
008e359c BR |
1088 | if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) |
1089 | return ref; | |
1090 | ||
4b99412e | 1091 | ptep = find_kvm_secondary_pte(kvm, gpa, &shift); |
01756099 | 1092 | if (ptep && pte_present(*ptep) && pte_young(*ptep)) |
b1c5356e | 1093 | ref = true; |
01756099 PM |
1094 | return ref; |
1095 | } | |
1096 | ||
8f7b79b8 PM |
1097 | /* Returns the number of PAGE_SIZE pages that are dirty */ |
1098 | static int kvm_radix_test_clear_dirty(struct kvm *kvm, | |
1099 | struct kvm_memory_slot *memslot, int pagenum) | |
1100 | { | |
1101 | unsigned long gfn = memslot->base_gfn + pagenum; | |
1102 | unsigned long gpa = gfn << PAGE_SHIFT; | |
bf8036a4 | 1103 | pte_t *ptep, pte; |
8f7b79b8 PM |
1104 | unsigned int shift; |
1105 | int ret = 0; | |
ae59a7e1 | 1106 | unsigned long old, *rmapp; |
8f7b79b8 | 1107 | |
008e359c BR |
1108 | if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) |
1109 | return ret; | |
1110 | ||
bf8036a4 AK |
1111 | /* |
1112 | * For performance reasons we don't hold kvm->mmu_lock while walking the | |
1113 | * partition scoped table. | |
1114 | */ | |
1115 | ptep = find_kvm_secondary_pte_unlocked(kvm, gpa, &shift); | |
1116 | if (!ptep) | |
1117 | return 0; | |
1118 | ||
1119 | pte = READ_ONCE(*ptep); | |
1120 | if (pte_present(pte) && pte_dirty(pte)) { | |
ae59a7e1 | 1121 | spin_lock(&kvm->mmu_lock); |
bf8036a4 AK |
1122 | /* |
1123 | * Recheck the pte again | |
1124 | */ | |
1125 | if (pte_val(pte) != pte_val(*ptep)) { | |
1126 | /* | |
1127 | * We have KVM_MEM_LOG_DIRTY_PAGES enabled. Hence we can | |
1128 | * only find PAGE_SIZE pte entries here. We can continue | |
1129 | * to use the pte addr returned by above page table | |
1130 | * walk. | |
1131 | */ | |
1132 | if (!pte_present(*ptep) || !pte_dirty(*ptep)) { | |
1133 | spin_unlock(&kvm->mmu_lock); | |
1134 | return 0; | |
1135 | } | |
1136 | } | |
1137 | ||
1138 | ret = 1; | |
1139 | VM_BUG_ON(shift); | |
ae59a7e1 SJS |
1140 | old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0, |
1141 | gpa, shift); | |
fd10be25 | 1142 | kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid); |
ae59a7e1 SJS |
1143 | /* Also clear bit in ptes in shadow pgtable for nested guests */ |
1144 | rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; | |
1145 | kvmhv_update_nest_rmap_rc_list(kvm, rmapp, _PAGE_DIRTY, 0, | |
1146 | old & PTE_RPN_MASK, | |
1147 | 1UL << shift); | |
1148 | spin_unlock(&kvm->mmu_lock); | |
8f7b79b8 PM |
1149 | } |
1150 | return ret; | |
1151 | } | |
1152 | ||
1153 | long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, | |
1154 | struct kvm_memory_slot *memslot, unsigned long *map) | |
1155 | { | |
1156 | unsigned long i, j; | |
8f7b79b8 PM |
1157 | int npages; |
1158 | ||
8f7b79b8 PM |
1159 | for (i = 0; i < memslot->npages; i = j) { |
1160 | npages = kvm_radix_test_clear_dirty(kvm, memslot, i); | |
1161 | ||
1162 | /* | |
1163 | * Note that if npages > 0 then i must be a multiple of npages, | |
1164 | * since huge pages are only used to back the guest at guest | |
1165 | * real addresses that are a multiple of their size. | |
1166 | * Since we have at most one PTE covering any given guest | |
1167 | * real address, if npages > 1 we can skip to i + npages. | |
1168 | */ | |
1169 | j = i + 1; | |
e641a317 PM |
1170 | if (npages) { |
1171 | set_dirty_bits(map, i, npages); | |
117647ff | 1172 | j = i + npages; |
e641a317 | 1173 | } |
8f7b79b8 PM |
1174 | } |
1175 | return 0; | |
1176 | } | |
1177 | ||
5af3e9d0 PM |
1178 | void kvmppc_radix_flush_memslot(struct kvm *kvm, |
1179 | const struct kvm_memory_slot *memslot) | |
1180 | { | |
1181 | unsigned long n; | |
1182 | pte_t *ptep; | |
1183 | unsigned long gpa; | |
1184 | unsigned int shift; | |
1185 | ||
c3262257 | 1186 | if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START) |
ce477a7a | 1187 | kvmppc_uvmem_drop_pages(memslot, kvm, true); |
c3262257 | 1188 | |
008e359c BR |
1189 | if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) |
1190 | return; | |
1191 | ||
5af3e9d0 PM |
1192 | gpa = memslot->base_gfn << PAGE_SHIFT; |
1193 | spin_lock(&kvm->mmu_lock); | |
1194 | for (n = memslot->npages; n; --n) { | |
4b99412e | 1195 | ptep = find_kvm_secondary_pte(kvm, gpa, &shift); |
5af3e9d0 PM |
1196 | if (ptep && pte_present(*ptep)) |
1197 | kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, | |
1198 | kvm->arch.lpid); | |
1199 | gpa += PAGE_SIZE; | |
1200 | } | |
11362b1b PM |
1201 | /* |
1202 | * Increase the mmu notifier sequence number to prevent any page | |
1203 | * fault that read the memslot earlier from writing a PTE. | |
1204 | */ | |
20ec3ebd | 1205 | kvm->mmu_invalidate_seq++; |
5af3e9d0 PM |
1206 | spin_unlock(&kvm->mmu_lock); |
1207 | } | |
1208 | ||
8cf4ecc0 PM |
1209 | static void add_rmmu_ap_encoding(struct kvm_ppc_rmmu_info *info, |
1210 | int psize, int *indexp) | |
1211 | { | |
1212 | if (!mmu_psize_defs[psize].shift) | |
1213 | return; | |
1214 | info->ap_encodings[*indexp] = mmu_psize_defs[psize].shift | | |
1215 | (mmu_psize_defs[psize].ap << 29); | |
1216 | ++(*indexp); | |
1217 | } | |
1218 | ||
1219 | int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info) | |
1220 | { | |
1221 | int i; | |
1222 | ||
1223 | if (!radix_enabled()) | |
1224 | return -EINVAL; | |
1225 | memset(info, 0, sizeof(*info)); | |
1226 | ||
1227 | /* 4k page size */ | |
1228 | info->geometries[0].page_shift = 12; | |
1229 | info->geometries[0].level_bits[0] = 9; | |
1230 | for (i = 1; i < 4; ++i) | |
1231 | info->geometries[0].level_bits[i] = p9_supported_radix_bits[i]; | |
1232 | /* 64k page size */ | |
1233 | info->geometries[1].page_shift = 16; | |
1234 | for (i = 0; i < 4; ++i) | |
1235 | info->geometries[1].level_bits[i] = p9_supported_radix_bits[i]; | |
1236 | ||
1237 | i = 0; | |
1238 | add_rmmu_ap_encoding(info, MMU_PAGE_4K, &i); | |
1239 | add_rmmu_ap_encoding(info, MMU_PAGE_64K, &i); | |
1240 | add_rmmu_ap_encoding(info, MMU_PAGE_2M, &i); | |
1241 | add_rmmu_ap_encoding(info, MMU_PAGE_1G, &i); | |
1242 | ||
1243 | return 0; | |
1244 | } | |
1245 | ||
1246 | int kvmppc_init_vm_radix(struct kvm *kvm) | |
1247 | { | |
1248 | kvm->arch.pgtable = pgd_alloc(kvm->mm); | |
1249 | if (!kvm->arch.pgtable) | |
1250 | return -ENOMEM; | |
1251 | return 0; | |
1252 | } | |
1253 | ||
5a319350 PM |
1254 | static void pte_ctor(void *addr) |
1255 | { | |
21828c99 AK |
1256 | memset(addr, 0, RADIX_PTE_TABLE_SIZE); |
1257 | } | |
1258 | ||
1259 | static void pmd_ctor(void *addr) | |
1260 | { | |
1261 | memset(addr, 0, RADIX_PMD_TABLE_SIZE); | |
5a319350 PM |
1262 | } |
1263 | ||
9a94d3ee PM |
1264 | struct debugfs_radix_state { |
1265 | struct kvm *kvm; | |
1266 | struct mutex mutex; | |
1267 | unsigned long gpa; | |
83a05510 | 1268 | int lpid; |
9a94d3ee PM |
1269 | int chars_left; |
1270 | int buf_index; | |
1271 | char buf[128]; | |
1272 | u8 hdr; | |
1273 | }; | |
1274 | ||
1275 | static int debugfs_radix_open(struct inode *inode, struct file *file) | |
1276 | { | |
1277 | struct kvm *kvm = inode->i_private; | |
1278 | struct debugfs_radix_state *p; | |
1279 | ||
1280 | p = kzalloc(sizeof(*p), GFP_KERNEL); | |
1281 | if (!p) | |
1282 | return -ENOMEM; | |
1283 | ||
1284 | kvm_get_kvm(kvm); | |
1285 | p->kvm = kvm; | |
1286 | mutex_init(&p->mutex); | |
1287 | file->private_data = p; | |
1288 | ||
1289 | return nonseekable_open(inode, file); | |
1290 | } | |
1291 | ||
1292 | static int debugfs_radix_release(struct inode *inode, struct file *file) | |
1293 | { | |
1294 | struct debugfs_radix_state *p = file->private_data; | |
1295 | ||
1296 | kvm_put_kvm(p->kvm); | |
1297 | kfree(p); | |
1298 | return 0; | |
1299 | } | |
1300 | ||
1301 | static ssize_t debugfs_radix_read(struct file *file, char __user *buf, | |
1302 | size_t len, loff_t *ppos) | |
1303 | { | |
1304 | struct debugfs_radix_state *p = file->private_data; | |
1305 | ssize_t ret, r; | |
1306 | unsigned long n; | |
1307 | struct kvm *kvm; | |
1308 | unsigned long gpa; | |
1309 | pgd_t *pgt; | |
83a05510 | 1310 | struct kvm_nested_guest *nested; |
2fb47060 MR |
1311 | pgd_t *pgdp; |
1312 | p4d_t p4d, *p4dp; | |
9a94d3ee PM |
1313 | pud_t pud, *pudp; |
1314 | pmd_t pmd, *pmdp; | |
1315 | pte_t *ptep; | |
1316 | int shift; | |
1317 | unsigned long pte; | |
1318 | ||
1319 | kvm = p->kvm; | |
1320 | if (!kvm_is_radix(kvm)) | |
1321 | return 0; | |
1322 | ||
1323 | ret = mutex_lock_interruptible(&p->mutex); | |
1324 | if (ret) | |
1325 | return ret; | |
1326 | ||
1327 | if (p->chars_left) { | |
1328 | n = p->chars_left; | |
1329 | if (n > len) | |
1330 | n = len; | |
1331 | r = copy_to_user(buf, p->buf + p->buf_index, n); | |
1332 | n -= r; | |
1333 | p->chars_left -= n; | |
1334 | p->buf_index += n; | |
1335 | buf += n; | |
1336 | len -= n; | |
1337 | ret = n; | |
1338 | if (r) { | |
1339 | if (!n) | |
1340 | ret = -EFAULT; | |
1341 | goto out; | |
1342 | } | |
1343 | } | |
1344 | ||
1345 | gpa = p->gpa; | |
83a05510 PM |
1346 | nested = NULL; |
1347 | pgt = NULL; | |
1348 | while (len != 0 && p->lpid >= 0) { | |
1349 | if (gpa >= RADIX_PGTABLE_RANGE) { | |
1350 | gpa = 0; | |
1351 | pgt = NULL; | |
1352 | if (nested) { | |
1353 | kvmhv_put_nested(nested); | |
1354 | nested = NULL; | |
1355 | } | |
1356 | p->lpid = kvmhv_nested_next_lpid(kvm, p->lpid); | |
1357 | p->hdr = 0; | |
1358 | if (p->lpid < 0) | |
1359 | break; | |
1360 | } | |
1361 | if (!pgt) { | |
1362 | if (p->lpid == 0) { | |
1363 | pgt = kvm->arch.pgtable; | |
1364 | } else { | |
1365 | nested = kvmhv_get_nested(kvm, p->lpid, false); | |
1366 | if (!nested) { | |
1367 | gpa = RADIX_PGTABLE_RANGE; | |
1368 | continue; | |
1369 | } | |
1370 | pgt = nested->shadow_pgtable; | |
1371 | } | |
1372 | } | |
1373 | n = 0; | |
9a94d3ee | 1374 | if (!p->hdr) { |
83a05510 PM |
1375 | if (p->lpid > 0) |
1376 | n = scnprintf(p->buf, sizeof(p->buf), | |
1377 | "\nNested LPID %d: ", p->lpid); | |
1378 | n += scnprintf(p->buf + n, sizeof(p->buf) - n, | |
9a94d3ee PM |
1379 | "pgdir: %lx\n", (unsigned long)pgt); |
1380 | p->hdr = 1; | |
1381 | goto copy; | |
1382 | } | |
1383 | ||
1384 | pgdp = pgt + pgd_index(gpa); | |
2fb47060 MR |
1385 | p4dp = p4d_offset(pgdp, gpa); |
1386 | p4d = READ_ONCE(*p4dp); | |
1387 | if (!(p4d_val(p4d) & _PAGE_PRESENT)) { | |
1388 | gpa = (gpa & P4D_MASK) + P4D_SIZE; | |
9a94d3ee PM |
1389 | continue; |
1390 | } | |
1391 | ||
2fb47060 | 1392 | pudp = pud_offset(&p4d, gpa); |
9a94d3ee PM |
1393 | pud = READ_ONCE(*pudp); |
1394 | if (!(pud_val(pud) & _PAGE_PRESENT)) { | |
1395 | gpa = (gpa & PUD_MASK) + PUD_SIZE; | |
1396 | continue; | |
1397 | } | |
1398 | if (pud_val(pud) & _PAGE_PTE) { | |
1399 | pte = pud_val(pud); | |
1400 | shift = PUD_SHIFT; | |
1401 | goto leaf; | |
1402 | } | |
1403 | ||
1404 | pmdp = pmd_offset(&pud, gpa); | |
1405 | pmd = READ_ONCE(*pmdp); | |
1406 | if (!(pmd_val(pmd) & _PAGE_PRESENT)) { | |
1407 | gpa = (gpa & PMD_MASK) + PMD_SIZE; | |
1408 | continue; | |
1409 | } | |
1410 | if (pmd_val(pmd) & _PAGE_PTE) { | |
1411 | pte = pmd_val(pmd); | |
1412 | shift = PMD_SHIFT; | |
1413 | goto leaf; | |
1414 | } | |
1415 | ||
1416 | ptep = pte_offset_kernel(&pmd, gpa); | |
1417 | pte = pte_val(READ_ONCE(*ptep)); | |
1418 | if (!(pte & _PAGE_PRESENT)) { | |
1419 | gpa += PAGE_SIZE; | |
1420 | continue; | |
1421 | } | |
1422 | shift = PAGE_SHIFT; | |
1423 | leaf: | |
1424 | n = scnprintf(p->buf, sizeof(p->buf), | |
1425 | " %lx: %lx %d\n", gpa, pte, shift); | |
1426 | gpa += 1ul << shift; | |
1427 | copy: | |
1428 | p->chars_left = n; | |
1429 | if (n > len) | |
1430 | n = len; | |
1431 | r = copy_to_user(buf, p->buf, n); | |
1432 | n -= r; | |
1433 | p->chars_left -= n; | |
1434 | p->buf_index = n; | |
1435 | buf += n; | |
1436 | len -= n; | |
1437 | ret += n; | |
1438 | if (r) { | |
1439 | if (!ret) | |
1440 | ret = -EFAULT; | |
1441 | break; | |
1442 | } | |
1443 | } | |
1444 | p->gpa = gpa; | |
83a05510 PM |
1445 | if (nested) |
1446 | kvmhv_put_nested(nested); | |
9a94d3ee PM |
1447 | |
1448 | out: | |
1449 | mutex_unlock(&p->mutex); | |
1450 | return ret; | |
1451 | } | |
1452 | ||
1453 | static ssize_t debugfs_radix_write(struct file *file, const char __user *buf, | |
1454 | size_t len, loff_t *ppos) | |
1455 | { | |
1456 | return -EACCES; | |
1457 | } | |
1458 | ||
1459 | static const struct file_operations debugfs_radix_fops = { | |
1460 | .owner = THIS_MODULE, | |
1461 | .open = debugfs_radix_open, | |
1462 | .release = debugfs_radix_release, | |
1463 | .read = debugfs_radix_read, | |
1464 | .write = debugfs_radix_write, | |
1465 | .llseek = generic_file_llseek, | |
1466 | }; | |
1467 | ||
1468 | void kvmhv_radix_debugfs_init(struct kvm *kvm) | |
1469 | { | |
faf01aef | 1470 | debugfs_create_file("radix", 0400, kvm->debugfs_dentry, kvm, |
c4fd527f | 1471 | &debugfs_radix_fops); |
9a94d3ee PM |
1472 | } |
1473 | ||
5a319350 PM |
1474 | int kvmppc_radix_init(void) |
1475 | { | |
21828c99 | 1476 | unsigned long size = sizeof(void *) << RADIX_PTE_INDEX_SIZE; |
5a319350 PM |
1477 | |
1478 | kvm_pte_cache = kmem_cache_create("kvm-pte", size, size, 0, pte_ctor); | |
1479 | if (!kvm_pte_cache) | |
1480 | return -ENOMEM; | |
21828c99 AK |
1481 | |
1482 | size = sizeof(void *) << RADIX_PMD_INDEX_SIZE; | |
1483 | ||
1484 | kvm_pmd_cache = kmem_cache_create("kvm-pmd", size, size, 0, pmd_ctor); | |
1485 | if (!kvm_pmd_cache) { | |
1486 | kmem_cache_destroy(kvm_pte_cache); | |
1487 | return -ENOMEM; | |
1488 | } | |
1489 | ||
5a319350 PM |
1490 | return 0; |
1491 | } | |
1492 | ||
1493 | void kvmppc_radix_exit(void) | |
1494 | { | |
1495 | kmem_cache_destroy(kvm_pte_cache); | |
21828c99 | 1496 | kmem_cache_destroy(kvm_pmd_cache); |
5a319350 | 1497 | } |