Commit | Line | Data |
---|---|---|
d32154f1 AG |
1 | /* |
2 | * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved. | |
3 | * | |
4 | * Authors: | |
5 | * Alexander Graf <agraf@suse.de> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License, version 2, as | |
9 | * published by the Free Software Foundation. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
19 | */ | |
20 | ||
21 | #include <linux/kvm_host.h> | |
22 | ||
23 | #include <asm/kvm_ppc.h> | |
24 | #include <asm/kvm_book3s.h> | |
25 | #include <asm/mmu-hash32.h> | |
26 | #include <asm/machdep.h> | |
27 | #include <asm/mmu_context.h> | |
28 | #include <asm/hw_irq.h> | |
29 | ||
30 | /* #define DEBUG_MMU */ | |
31 | /* #define DEBUG_SR */ | |
32 | ||
33 | #ifdef DEBUG_MMU | |
34 | #define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__) | |
35 | #else | |
36 | #define dprintk_mmu(a, ...) do { } while(0) | |
37 | #endif | |
38 | ||
39 | #ifdef DEBUG_SR | |
40 | #define dprintk_sr(a, ...) printk(KERN_INFO a, __VA_ARGS__) | |
41 | #else | |
42 | #define dprintk_sr(a, ...) do { } while(0) | |
43 | #endif | |
44 | ||
45 | #if PAGE_SHIFT != 12 | |
46 | #error Unknown page size | |
47 | #endif | |
48 | ||
49 | #ifdef CONFIG_SMP | |
50 | #error XXX need to grab mmu_hash_lock | |
51 | #endif | |
52 | ||
53 | #ifdef CONFIG_PTE_64BIT | |
54 | #error Only 32 bit pages are supported for now | |
55 | #endif | |
56 | ||
57 | static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) | |
58 | { | |
59 | volatile u32 *pteg; | |
60 | ||
61 | dprintk_mmu("KVM: Flushing SPTE: 0x%llx (0x%llx) -> 0x%llx\n", | |
62 | pte->pte.eaddr, pte->pte.vpage, pte->host_va); | |
63 | ||
64 | pteg = (u32*)pte->slot; | |
65 | ||
66 | pteg[0] = 0; | |
67 | asm volatile ("sync"); | |
68 | asm volatile ("tlbie %0" : : "r" (pte->pte.eaddr) : "memory"); | |
69 | asm volatile ("sync"); | |
70 | asm volatile ("tlbsync"); | |
71 | ||
72 | pte->host_va = 0; | |
73 | ||
74 | if (pte->pte.may_write) | |
75 | kvm_release_pfn_dirty(pte->pfn); | |
76 | else | |
77 | kvm_release_pfn_clean(pte->pfn); | |
78 | } | |
79 | ||
80 | void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, u64 _guest_ea, u64 _ea_mask) | |
81 | { | |
82 | int i; | |
83 | u32 guest_ea = _guest_ea; | |
84 | u32 ea_mask = _ea_mask; | |
85 | ||
86 | dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%x & 0x%x\n", | |
87 | vcpu->arch.hpte_cache_offset, guest_ea, ea_mask); | |
88 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | |
89 | ||
90 | guest_ea &= ea_mask; | |
91 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | |
92 | struct hpte_cache *pte; | |
93 | ||
94 | pte = &vcpu->arch.hpte_cache[i]; | |
95 | if (!pte->host_va) | |
96 | continue; | |
97 | ||
98 | if ((pte->pte.eaddr & ea_mask) == guest_ea) { | |
99 | invalidate_pte(vcpu, pte); | |
100 | } | |
101 | } | |
102 | ||
103 | /* Doing a complete flush -> start from scratch */ | |
104 | if (!ea_mask) | |
105 | vcpu->arch.hpte_cache_offset = 0; | |
106 | } | |
107 | ||
108 | void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) | |
109 | { | |
110 | int i; | |
111 | ||
112 | dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n", | |
113 | vcpu->arch.hpte_cache_offset, guest_vp, vp_mask); | |
114 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | |
115 | ||
116 | guest_vp &= vp_mask; | |
117 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | |
118 | struct hpte_cache *pte; | |
119 | ||
120 | pte = &vcpu->arch.hpte_cache[i]; | |
121 | if (!pte->host_va) | |
122 | continue; | |
123 | ||
124 | if ((pte->pte.vpage & vp_mask) == guest_vp) { | |
125 | invalidate_pte(vcpu, pte); | |
126 | } | |
127 | } | |
128 | } | |
129 | ||
130 | void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, u64 pa_start, u64 pa_end) | |
131 | { | |
132 | int i; | |
133 | ||
134 | dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%llx & 0x%llx\n", | |
135 | vcpu->arch.hpte_cache_offset, pa_start, pa_end); | |
136 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | |
137 | ||
138 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | |
139 | struct hpte_cache *pte; | |
140 | ||
141 | pte = &vcpu->arch.hpte_cache[i]; | |
142 | if (!pte->host_va) | |
143 | continue; | |
144 | ||
145 | if ((pte->pte.raddr >= pa_start) && | |
146 | (pte->pte.raddr < pa_end)) { | |
147 | invalidate_pte(vcpu, pte); | |
148 | } | |
149 | } | |
150 | } | |
151 | ||
152 | struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data) | |
153 | { | |
154 | int i; | |
155 | u64 guest_vp; | |
156 | ||
157 | guest_vp = vcpu->arch.mmu.ea_to_vp(vcpu, ea, false); | |
158 | for (i=0; i<vcpu->arch.hpte_cache_offset; i++) { | |
159 | struct hpte_cache *pte; | |
160 | ||
161 | pte = &vcpu->arch.hpte_cache[i]; | |
162 | if (!pte->host_va) | |
163 | continue; | |
164 | ||
165 | if (pte->pte.vpage == guest_vp) | |
166 | return &pte->pte; | |
167 | } | |
168 | ||
169 | return NULL; | |
170 | } | |
171 | ||
172 | static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) | |
173 | { | |
174 | if (vcpu->arch.hpte_cache_offset == HPTEG_CACHE_NUM) | |
175 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | |
176 | ||
177 | return vcpu->arch.hpte_cache_offset++; | |
178 | } | |
179 | ||
180 | /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using | |
181 | * a hash, so we don't waste cycles on looping */ | |
182 | static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) | |
183 | { | |
184 | return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^ | |
185 | ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^ | |
186 | ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^ | |
187 | ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^ | |
188 | ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^ | |
189 | ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^ | |
190 | ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^ | |
191 | ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK)); | |
192 | } | |
193 | ||
194 | ||
195 | static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) | |
196 | { | |
197 | struct kvmppc_sid_map *map; | |
198 | u16 sid_map_mask; | |
199 | ||
200 | if (vcpu->arch.msr & MSR_PR) | |
201 | gvsid |= VSID_PR; | |
202 | ||
203 | sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); | |
204 | map = &to_book3s(vcpu)->sid_map[sid_map_mask]; | |
205 | if (map->guest_vsid == gvsid) { | |
206 | dprintk_sr("SR: Searching 0x%llx -> 0x%llx\n", | |
207 | gvsid, map->host_vsid); | |
208 | return map; | |
209 | } | |
210 | ||
211 | map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - sid_map_mask]; | |
212 | if (map->guest_vsid == gvsid) { | |
213 | dprintk_sr("SR: Searching 0x%llx -> 0x%llx\n", | |
214 | gvsid, map->host_vsid); | |
215 | return map; | |
216 | } | |
217 | ||
218 | dprintk_sr("SR: Searching 0x%llx -> not found\n", gvsid); | |
219 | return NULL; | |
220 | } | |
221 | ||
222 | extern struct hash_pte *Hash; | |
223 | extern unsigned long _SDR1; | |
224 | ||
225 | static u32 *kvmppc_mmu_get_pteg(struct kvm_vcpu *vcpu, u32 vsid, u32 eaddr, | |
226 | bool primary) | |
227 | { | |
228 | u32 page, hash, htabmask; | |
229 | ulong pteg = (ulong)Hash; | |
230 | ||
231 | page = (eaddr & ~ESID_MASK) >> 12; | |
232 | ||
233 | hash = ((vsid ^ page) << 6); | |
234 | if (!primary) | |
235 | hash = ~hash; | |
236 | ||
237 | htabmask = ((_SDR1 & 0x1FF) << 16) | 0xFFC0; | |
238 | hash &= htabmask; | |
239 | ||
240 | pteg |= hash; | |
241 | ||
242 | dprintk_mmu("htab: %p | hash: %x | htabmask: %x | pteg: %lx\n", | |
243 | Hash, hash, htabmask, pteg); | |
244 | ||
245 | return (u32*)pteg; | |
246 | } | |
247 | ||
248 | extern char etext[]; | |
249 | ||
250 | int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) | |
251 | { | |
252 | pfn_t hpaddr; | |
253 | u64 va; | |
254 | u64 vsid; | |
255 | struct kvmppc_sid_map *map; | |
256 | volatile u32 *pteg; | |
257 | u32 eaddr = orig_pte->eaddr; | |
258 | u32 pteg0, pteg1; | |
259 | register int rr = 0; | |
260 | bool primary = false; | |
261 | bool evict = false; | |
262 | int hpte_id; | |
263 | struct hpte_cache *pte; | |
264 | ||
265 | /* Get host physical address for gpa */ | |
266 | hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); | |
267 | if (kvm_is_error_hva(hpaddr)) { | |
268 | printk(KERN_INFO "Couldn't get guest page for gfn %llx!\n", | |
269 | orig_pte->eaddr); | |
270 | return -EINVAL; | |
271 | } | |
272 | hpaddr <<= PAGE_SHIFT; | |
273 | ||
274 | /* and write the mapping ea -> hpa into the pt */ | |
275 | vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); | |
276 | map = find_sid_vsid(vcpu, vsid); | |
277 | if (!map) { | |
278 | kvmppc_mmu_map_segment(vcpu, eaddr); | |
279 | map = find_sid_vsid(vcpu, vsid); | |
280 | } | |
281 | BUG_ON(!map); | |
282 | ||
283 | vsid = map->host_vsid; | |
284 | va = (vsid << SID_SHIFT) | (eaddr & ~ESID_MASK); | |
285 | ||
286 | next_pteg: | |
287 | if (rr == 16) { | |
288 | primary = !primary; | |
289 | evict = true; | |
290 | rr = 0; | |
291 | } | |
292 | ||
293 | pteg = kvmppc_mmu_get_pteg(vcpu, vsid, eaddr, primary); | |
294 | ||
295 | /* not evicting yet */ | |
296 | if (!evict && (pteg[rr] & PTE_V)) { | |
297 | rr += 2; | |
298 | goto next_pteg; | |
299 | } | |
300 | ||
301 | dprintk_mmu("KVM: old PTEG: %p (%d)\n", pteg, rr); | |
302 | dprintk_mmu("KVM: %08x - %08x\n", pteg[0], pteg[1]); | |
303 | dprintk_mmu("KVM: %08x - %08x\n", pteg[2], pteg[3]); | |
304 | dprintk_mmu("KVM: %08x - %08x\n", pteg[4], pteg[5]); | |
305 | dprintk_mmu("KVM: %08x - %08x\n", pteg[6], pteg[7]); | |
306 | dprintk_mmu("KVM: %08x - %08x\n", pteg[8], pteg[9]); | |
307 | dprintk_mmu("KVM: %08x - %08x\n", pteg[10], pteg[11]); | |
308 | dprintk_mmu("KVM: %08x - %08x\n", pteg[12], pteg[13]); | |
309 | dprintk_mmu("KVM: %08x - %08x\n", pteg[14], pteg[15]); | |
310 | ||
311 | pteg0 = ((eaddr & 0x0fffffff) >> 22) | (vsid << 7) | PTE_V | | |
312 | (primary ? 0 : PTE_SEC); | |
313 | pteg1 = hpaddr | PTE_M | PTE_R | PTE_C; | |
314 | ||
315 | if (orig_pte->may_write) { | |
316 | pteg1 |= PP_RWRW; | |
317 | mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); | |
318 | } else { | |
319 | pteg1 |= PP_RWRX; | |
320 | } | |
321 | ||
322 | local_irq_disable(); | |
323 | ||
324 | if (pteg[rr]) { | |
325 | pteg[rr] = 0; | |
326 | asm volatile ("sync"); | |
327 | } | |
328 | pteg[rr + 1] = pteg1; | |
329 | pteg[rr] = pteg0; | |
330 | asm volatile ("sync"); | |
331 | ||
332 | local_irq_enable(); | |
333 | ||
334 | dprintk_mmu("KVM: new PTEG: %p\n", pteg); | |
335 | dprintk_mmu("KVM: %08x - %08x\n", pteg[0], pteg[1]); | |
336 | dprintk_mmu("KVM: %08x - %08x\n", pteg[2], pteg[3]); | |
337 | dprintk_mmu("KVM: %08x - %08x\n", pteg[4], pteg[5]); | |
338 | dprintk_mmu("KVM: %08x - %08x\n", pteg[6], pteg[7]); | |
339 | dprintk_mmu("KVM: %08x - %08x\n", pteg[8], pteg[9]); | |
340 | dprintk_mmu("KVM: %08x - %08x\n", pteg[10], pteg[11]); | |
341 | dprintk_mmu("KVM: %08x - %08x\n", pteg[12], pteg[13]); | |
342 | dprintk_mmu("KVM: %08x - %08x\n", pteg[14], pteg[15]); | |
343 | ||
344 | ||
345 | /* Now tell our Shadow PTE code about the new page */ | |
346 | ||
347 | hpte_id = kvmppc_mmu_hpte_cache_next(vcpu); | |
348 | pte = &vcpu->arch.hpte_cache[hpte_id]; | |
349 | ||
350 | dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n", | |
351 | orig_pte->may_write ? 'w' : '-', | |
352 | orig_pte->may_execute ? 'x' : '-', | |
353 | orig_pte->eaddr, (ulong)pteg, va, | |
354 | orig_pte->vpage, hpaddr); | |
355 | ||
356 | pte->slot = (ulong)&pteg[rr]; | |
357 | pte->host_va = va; | |
358 | pte->pte = *orig_pte; | |
359 | pte->pfn = hpaddr >> PAGE_SHIFT; | |
360 | ||
361 | return 0; | |
362 | } | |
363 | ||
364 | static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) | |
365 | { | |
366 | struct kvmppc_sid_map *map; | |
367 | struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); | |
368 | u16 sid_map_mask; | |
369 | static int backwards_map = 0; | |
370 | ||
371 | if (vcpu->arch.msr & MSR_PR) | |
372 | gvsid |= VSID_PR; | |
373 | ||
374 | /* We might get collisions that trap in preceding order, so let's | |
375 | map them differently */ | |
376 | ||
377 | sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); | |
378 | if (backwards_map) | |
379 | sid_map_mask = SID_MAP_MASK - sid_map_mask; | |
380 | ||
381 | map = &to_book3s(vcpu)->sid_map[sid_map_mask]; | |
382 | ||
383 | /* Make sure we're taking the other map next time */ | |
384 | backwards_map = !backwards_map; | |
385 | ||
386 | /* Uh-oh ... out of mappings. Let's flush! */ | |
387 | if (vcpu_book3s->vsid_next >= vcpu_book3s->vsid_max) { | |
388 | vcpu_book3s->vsid_next = vcpu_book3s->vsid_first; | |
389 | memset(vcpu_book3s->sid_map, 0, | |
390 | sizeof(struct kvmppc_sid_map) * SID_MAP_NUM); | |
391 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | |
392 | kvmppc_mmu_flush_segments(vcpu); | |
393 | } | |
394 | map->host_vsid = vcpu_book3s->vsid_next; | |
395 | ||
396 | /* Would have to be 111 to be completely aligned with the rest of | |
397 | Linux, but that is just way too little space! */ | |
398 | vcpu_book3s->vsid_next+=1; | |
399 | ||
400 | map->guest_vsid = gvsid; | |
401 | map->valid = true; | |
402 | ||
403 | return map; | |
404 | } | |
405 | ||
406 | int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) | |
407 | { | |
408 | u32 esid = eaddr >> SID_SHIFT; | |
409 | u64 gvsid; | |
410 | u32 sr; | |
411 | struct kvmppc_sid_map *map; | |
412 | struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu); | |
413 | ||
414 | if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { | |
415 | /* Invalidate an entry */ | |
416 | svcpu->sr[esid] = SR_INVALID; | |
417 | return -ENOENT; | |
418 | } | |
419 | ||
420 | map = find_sid_vsid(vcpu, gvsid); | |
421 | if (!map) | |
422 | map = create_sid_map(vcpu, gvsid); | |
423 | ||
424 | map->guest_esid = esid; | |
425 | sr = map->host_vsid | SR_KP; | |
426 | svcpu->sr[esid] = sr; | |
427 | ||
428 | dprintk_sr("MMU: mtsr %d, 0x%x\n", esid, sr); | |
429 | ||
430 | return 0; | |
431 | } | |
432 | ||
433 | void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) | |
434 | { | |
435 | int i; | |
436 | struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu); | |
437 | ||
438 | dprintk_sr("MMU: flushing all segments (%d)\n", ARRAY_SIZE(svcpu->sr)); | |
439 | for (i = 0; i < ARRAY_SIZE(svcpu->sr); i++) | |
440 | svcpu->sr[i] = SR_INVALID; | |
441 | } | |
442 | ||
443 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) | |
444 | { | |
445 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | |
446 | preempt_disable(); | |
447 | __destroy_context(to_book3s(vcpu)->context_id); | |
448 | preempt_enable(); | |
449 | } | |
450 | ||
451 | /* From mm/mmu_context_hash32.c */ | |
452 | #define CTX_TO_VSID(ctx) (((ctx) * (897 * 16)) & 0xffffff) | |
453 | ||
454 | int kvmppc_mmu_init(struct kvm_vcpu *vcpu) | |
455 | { | |
456 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); | |
457 | int err; | |
458 | ||
459 | err = __init_new_context(); | |
460 | if (err < 0) | |
461 | return -1; | |
462 | vcpu3s->context_id = err; | |
463 | ||
464 | vcpu3s->vsid_max = CTX_TO_VSID(vcpu3s->context_id + 1) - 1; | |
465 | vcpu3s->vsid_first = CTX_TO_VSID(vcpu3s->context_id); | |
466 | ||
467 | #if 0 /* XXX still doesn't guarantee uniqueness */ | |
468 | /* We could collide with the Linux vsid space because the vsid | |
469 | * wraps around at 24 bits. We're safe if we do our own space | |
470 | * though, so let's always set the highest bit. */ | |
471 | ||
472 | vcpu3s->vsid_max |= 0x00800000; | |
473 | vcpu3s->vsid_first |= 0x00800000; | |
474 | #endif | |
475 | BUG_ON(vcpu3s->vsid_max < vcpu3s->vsid_first); | |
476 | ||
477 | vcpu3s->vsid_next = vcpu3s->vsid_first; | |
478 | ||
479 | return 0; | |
480 | } |