2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15 * Copyright IBM Corp. 2007
17 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
20 #ifndef __POWERPC_KVM_HOST_H__
21 #define __POWERPC_KVM_HOST_H__
23 #include <linux/mutex.h>
24 #include <linux/hrtimer.h>
25 #include <linux/interrupt.h>
26 #include <linux/types.h>
27 #include <linux/kvm_types.h>
28 #include <linux/threads.h>
29 #include <linux/spinlock.h>
30 #include <linux/kvm_para.h>
31 #include <linux/list.h>
32 #include <linux/atomic.h>
33 #include <asm/kvm_asm.h>
34 #include <asm/processor.h>
36 #include <asm/cacheflush.h>
37 #include <asm/hvcall.h>
40 #define KVM_MAX_VCPUS NR_CPUS
41 #define KVM_MAX_VCORES NR_CPUS
42 #define KVM_USER_MEM_SLOTS 512
44 #include <asm/cputhreads.h>
46 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
47 #include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */
48 #define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES)
49 #define KVM_MAX_NESTED_GUESTS KVMPPC_NR_LPIDS
52 #define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
53 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
55 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
57 #define KVM_HALT_POLL_NS_DEFAULT 10000 /* 10 us */
59 /* These values are internal and can be increased later */
60 #define KVM_NR_IRQCHIPS 1
61 #define KVM_IRQCHIP_NUM_PINS 256
63 /* PPC-specific vcpu->requests bit members */
64 #define KVM_REQ_WATCHDOG KVM_ARCH_REQ(0)
65 #define KVM_REQ_EPR_EXIT KVM_ARCH_REQ(1)
67 #include <linux/mmu_notifier.h>
69 #define KVM_ARCH_WANT_MMU_NOTIFIER
71 extern int kvm_unmap_hva_range(struct kvm *kvm,
72 unsigned long start, unsigned long end);
73 extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
74 extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
75 extern int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
77 #define HPTEG_CACHE_NUM (1 << 15)
78 #define HPTEG_HASH_BITS_PTE 13
79 #define HPTEG_HASH_BITS_PTE_LONG 12
80 #define HPTEG_HASH_BITS_VPTE 13
81 #define HPTEG_HASH_BITS_VPTE_LONG 5
82 #define HPTEG_HASH_BITS_VPTE_64K 11
83 #define HPTEG_HASH_NUM_PTE (1 << HPTEG_HASH_BITS_PTE)
84 #define HPTEG_HASH_NUM_PTE_LONG (1 << HPTEG_HASH_BITS_PTE_LONG)
85 #define HPTEG_HASH_NUM_VPTE (1 << HPTEG_HASH_BITS_VPTE)
86 #define HPTEG_HASH_NUM_VPTE_LONG (1 << HPTEG_HASH_BITS_VPTE_LONG)
87 #define HPTEG_HASH_NUM_VPTE_64K (1 << HPTEG_HASH_BITS_VPTE_64K)
89 /* Physical Address Mask - allowed range of real mode RAM access */
90 #define KVM_PAM 0x0fffffffffffffffULL
96 struct kvmppc_vcpu_book3s;
97 struct kvmppc_book3s_shadow_vcpu;
98 struct kvm_nested_guest;
101 ulong remote_tlb_flush;
104 struct kvm_vcpu_stat {
109 /* Account for special types of light exits: */
110 u64 itlb_real_miss_exits;
111 u64 itlb_virt_miss_exits;
112 u64 dtlb_real_miss_exits;
113 u64 dtlb_virt_miss_exits;
117 u64 emulated_inst_exits;
120 u64 halt_poll_success_ns;
121 u64 halt_poll_fail_ns;
123 u64 halt_successful_poll;
124 u64 halt_attempted_poll;
125 u64 halt_successful_wait;
126 u64 halt_poll_invalid;
132 #ifdef CONFIG_PPC_BOOK3S
146 enum kvm_exit_types {
149 ITLB_REAL_MISS_EXITS,
150 ITLB_VIRT_MISS_EXITS,
151 DTLB_REAL_MISS_EXITS,
152 DTLB_VIRT_MISS_EXITS,
157 EMULATED_MTMSRWE_EXITS,
158 EMULATED_WRTEE_EXITS,
159 EMULATED_MTSPR_EXITS,
160 EMULATED_MFSPR_EXITS,
161 EMULATED_MTMSR_EXITS,
162 EMULATED_MFMSR_EXITS,
163 EMULATED_TLBSX_EXITS,
164 EMULATED_TLBWE_EXITS,
177 __NUMBER_OF_KVM_EXIT_TYPES
180 /* allow access to big endian 32bit upper/lower parts and 64bit var */
181 struct kvmppc_exit_timing {
190 struct kvmppc_pginfo {
195 struct kvmppc_spapr_tce_iommu_table {
197 struct list_head next;
198 struct iommu_table *tbl;
202 struct kvmppc_spapr_tce_table {
203 struct list_head list;
208 u64 offset; /* in pages */
209 u64 size; /* window size in pages */
210 struct list_head iommu_tables;
211 struct page *pages[0];
214 /* XICS components, defined in book3s_xics.c */
217 extern struct kvm_device_ops kvm_xics_ops;
219 /* XIVE components, defined in book3s_xive.c */
221 struct kvmppc_xive_vcpu;
222 extern struct kvm_device_ops kvm_xive_ops;
224 struct kvmppc_passthru_irqmap;
227 * The reverse mapping array has one entry for each HPTE,
228 * which stores the guest's view of the second word of the HPTE
229 * (including the guest physical address of the mapping),
230 * plus forward and backward pointers in a doubly-linked ring
231 * of HPTEs that map the same host page. The pointers in this
232 * ring are 32-bit HPTE indexes, to save space.
234 struct revmap_entry {
235 unsigned long guest_rpte;
236 unsigned int forw, back;
240 * We use the top bit of each memslot->arch.rmap entry as a lock bit,
241 * and bit 32 as a present flag. The bottom 32 bits are the
242 * index in the guest HPT of a HPTE that points to the page.
244 #define KVMPPC_RMAP_LOCK_BIT 63
245 #define KVMPPC_RMAP_RC_SHIFT 32
246 #define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
247 #define KVMPPC_RMAP_PRESENT 0x100000000ul
248 #define KVMPPC_RMAP_INDEX 0xfffffffful
250 struct kvm_arch_memory_slot {
251 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
253 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
256 struct kvm_hpt_info {
257 /* Host virtual (linear mapping) address of guest HPT */
259 /* Array of reverse mapping entries for each guest HPTE */
260 struct revmap_entry *rev;
261 /* Guest HPT size is 2**(order) bytes */
263 /* 1 if HPT allocated with CMA, 0 otherwise */
267 struct kvm_resize_hpt;
271 unsigned int smt_mode; /* # vcpus per virtual core */
272 unsigned int emul_smt_mode; /* emualted SMT mode, on P9 */
273 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
274 unsigned int tlb_sets;
275 struct kvm_hpt_info hpt;
276 atomic64_t mmio_update;
277 unsigned int host_lpid;
278 unsigned long host_lpcr;
280 unsigned long host_sdr1;
282 unsigned long vrma_slb_v;
284 atomic_t vcpus_running;
286 atomic_t hpte_mod_interest;
287 cpumask_t need_tlb_flush;
288 cpumask_t cpu_in_guest;
295 struct dentry *debugfs_dir;
296 struct dentry *htab_dentry;
297 struct dentry *radix_dentry;
298 struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */
299 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
300 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
301 struct mutex hpt_mutex;
303 #ifdef CONFIG_PPC_BOOK3S_64
304 struct list_head spapr_tce_tables;
305 struct list_head rtas_tokens;
306 DECLARE_BITMAP(enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
308 #ifdef CONFIG_KVM_MPIC
309 struct openpic *mpic;
311 #ifdef CONFIG_KVM_XICS
312 struct kvmppc_xics *xics;
313 struct kvmppc_xive *xive;
314 struct kvmppc_passthru_irqmap *pimap;
316 struct kvmppc_ops *kvm_ops;
317 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
320 struct kvm_nested_guest *nested_guests[KVM_MAX_NESTED_GUESTS];
321 /* This array can grow quite large, keep it at the end */
322 struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
326 #define VCORE_ENTRY_MAP(vc) ((vc)->entry_exit_map & 0xff)
327 #define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8)
328 #define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0)
330 /* This bit is used when a vcore exit is triggered from outside the vcore */
331 #define VCORE_EXIT_REQ 0x10000
334 * Values for vcore_state.
335 * Note that these are arranged such that lower values
336 * (< VCORE_SLEEPING) don't require stolen time accounting
337 * on load/unload, and higher values do.
339 #define VCORE_INACTIVE 0
340 #define VCORE_PREEMPT 1
341 #define VCORE_PIGGYBACK 2
342 #define VCORE_SLEEPING 3
343 #define VCORE_RUNNING 4
344 #define VCORE_EXITING 5
345 #define VCORE_POLLING 6
348 * Struct used to manage memory for a virtual processor area
349 * registered by a PAPR guest. There are three types of area
350 * that a guest can register.
353 unsigned long gpa; /* Current guest phys addr */
354 void *pinned_addr; /* Address in kernel linear mapping */
355 void *pinned_end; /* End of region */
356 unsigned long next_gpa; /* Guest phys addr for update */
357 unsigned long len; /* Number of bytes required */
358 u8 update_pending; /* 1 => update pinned_addr from next_gpa */
359 bool dirty; /* true => area has been modified by kernel */
368 bool may_execute : 1;
371 u8 page_size; /* MMU_PAGE_xxx */
377 void (*slbmte)(struct kvm_vcpu *vcpu, u64 rb, u64 rs);
378 u64 (*slbmfee)(struct kvm_vcpu *vcpu, u64 slb_nr);
379 u64 (*slbmfev)(struct kvm_vcpu *vcpu, u64 slb_nr);
380 void (*slbie)(struct kvm_vcpu *vcpu, u64 slb_nr);
381 void (*slbia)(struct kvm_vcpu *vcpu);
383 void (*mtsrin)(struct kvm_vcpu *vcpu, u32 srnum, ulong value);
384 u32 (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum);
385 int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr,
386 struct kvmppc_pte *pte, bool data, bool iswrite);
387 void (*reset_msr)(struct kvm_vcpu *vcpu);
388 void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large);
389 int (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid);
390 u64 (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data);
391 bool (*is_dcbz32)(struct kvm_vcpu *vcpu);
403 bool large : 1; /* PTEs are 16MB */
404 bool tb : 1; /* 1TB segment */
406 u8 base_page_size; /* MMU_PAGE_xxx */
409 /* Struct used to accumulate timing information in HV real mode code */
410 struct kvmhv_tb_accumulator {
411 u64 seqcount; /* used to synchronize access, also count * 2 */
412 u64 tb_total; /* total time in timebase ticks */
413 u64 tb_min; /* min time */
414 u64 tb_max; /* max time */
417 #ifdef CONFIG_PPC_BOOK3S_64
418 struct kvmppc_irq_map {
421 struct irq_desc *desc;
424 #define KVMPPC_PIRQ_MAPPED 1024
425 struct kvmppc_passthru_irqmap {
427 struct kvmppc_irq_map mapped[KVMPPC_PIRQ_MAPPED];
431 # ifdef CONFIG_PPC_FSL_BOOK3E
432 #define KVMPPC_BOOKE_IAC_NUM 2
433 #define KVMPPC_BOOKE_DAC_NUM 2
435 #define KVMPPC_BOOKE_IAC_NUM 4
436 #define KVMPPC_BOOKE_DAC_NUM 2
438 #define KVMPPC_BOOKE_MAX_IAC 4
439 #define KVMPPC_BOOKE_MAX_DAC 2
441 /* KVMPPC_EPR_USER takes precedence over KVMPPC_EPR_KERNEL */
442 #define KVMPPC_EPR_NONE 0 /* EPR not supported */
443 #define KVMPPC_EPR_USER 1 /* exit to userspace to fill EPR */
444 #define KVMPPC_EPR_KERNEL 2 /* in-kernel irqchip */
446 #define KVMPPC_IRQ_DEFAULT 0
447 #define KVMPPC_IRQ_MPIC 1
448 #define KVMPPC_IRQ_XICS 2 /* Includes a XIVE option */
450 #define MMIO_HPTE_CACHE_SIZE 4
452 struct mmio_hpte_cache_entry {
453 unsigned long hpte_v;
454 unsigned long hpte_r;
456 unsigned long pte_index;
460 unsigned int slb_base_pshift;
463 struct mmio_hpte_cache {
464 struct mmio_hpte_cache_entry entry[MMIO_HPTE_CACHE_SIZE];
468 #define KVMPPC_VSX_COPY_NONE 0
469 #define KVMPPC_VSX_COPY_WORD 1
470 #define KVMPPC_VSX_COPY_DWORD 2
471 #define KVMPPC_VSX_COPY_DWORD_LOAD_DUMP 3
472 #define KVMPPC_VSX_COPY_WORD_LOAD_DUMP 4
474 #define KVMPPC_VMX_COPY_BYTE 8
475 #define KVMPPC_VMX_COPY_HWORD 9
476 #define KVMPPC_VMX_COPY_WORD 10
477 #define KVMPPC_VMX_COPY_DWORD 11
481 /* W0 and W1 of a XIVE thread management context */
496 struct kvm_vcpu_arch {
499 #ifdef CONFIG_PPC_BOOK3S
500 struct kvmppc_slb slb[64];
501 int slb_max; /* 1 + index of last valid entry in slb[] */
502 int slb_nr; /* total number of entries in SLB */
503 struct kvmppc_mmu mmu;
504 struct kvmppc_vcpu_book3s *book3s;
506 #ifdef CONFIG_PPC_BOOK3S_32
507 struct kvmppc_book3s_shadow_vcpu *shadow_vcpu;
512 struct thread_fp_state fp;
520 #ifdef CONFIG_ALTIVEC
521 struct thread_vr_state vr;
524 #ifdef CONFIG_KVM_BOOKE_HV
534 #if defined(CONFIG_BOOKE)
535 #if defined(CONFIG_KVM_BOOKE_HV) || defined(CONFIG_64BIT)
540 #ifdef CONFIG_PPC_BOOK3S
541 /* For Gekko paired singles */
545 #ifdef CONFIG_PPC_BOOK3S
549 #ifdef CONFIG_PPC_BOOK3S
551 ulong guest_owned_ext;
583 u32 vrsave; /* also USPRG0 */
585 /* shadow_msr is unused for BookE HV */
598 /* Time base value when we entered the guest */
603 ulong tsr; /* we need to perform set/clr_bits() which requires ulong */
623 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
640 struct thread_fp_state fp_tm;
642 struct thread_vr_state vr_tm;
643 u32 vrsave_tm; /* also USPRG0 */
646 #ifdef CONFIG_KVM_EXIT_TIMING
647 struct mutex exit_timing_lock;
648 struct kvmppc_exit_timing timing_exit;
649 struct kvmppc_exit_timing timing_last_enter;
651 u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
652 u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
653 u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES];
654 u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
655 u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
656 u64 timing_last_exit;
657 struct dentry *debugfs_exit_timing;
660 #ifdef CONFIG_PPC_BOOK3S
663 unsigned long intr_msr;
664 ulong fault_gpa; /* guest real address of page fault (POWER9) */
673 struct timer_list wdt_timer;
682 /* guest debug registers*/
683 struct debug_reg dbg_reg;
685 gpa_t paddr_accessed;
686 gva_t vaddr_accessed;
689 u16 io_gpr; /* GPR used as IO source/target */
690 u8 mmio_host_swabbed;
692 /* conversion between single and double precision */
695 * Number of simulations for vsx.
696 * If we use 2*8bytes to simulate 1*16bytes,
697 * then the number should be 2 and
698 * mmio_copy_type=KVMPPC_VSX_COPY_DWORD.
699 * If we use 4*4bytes to simulate 1*16bytes,
700 * the number should be 4 and
701 * mmio_vsx_copy_type=KVMPPC_VSX_COPY_WORD.
703 u8 mmio_vsx_copy_nums;
705 u8 mmio_vmx_copy_nums;
715 u8 epr_flags; /* KVMPPC_EPR_xxx */
717 u8 external_oneshot; /* clear external irq after delivery */
719 u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
721 struct hrtimer dec_timer;
724 unsigned long pending_exceptions;
728 u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
731 struct swait_queue_head *wqp;
732 struct kvmppc_vcore *vcore;
740 wait_queue_head_t cpu_run;
741 struct machine_check_event mce_evt; /* Valid if trap == 0x200 */
743 struct kvm_vcpu_arch_shared *shared;
744 #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
745 bool shared_big_endian;
747 unsigned long magic_page_pa; /* phys addr to map the magic page to */
748 unsigned long magic_page_ea; /* effect. addr to map the magic page to */
749 bool disable_kernel_nx;
751 int irq_type; /* one of KVM_IRQ_* */
753 struct openpic *mpic; /* KVM_IRQ_MPIC */
754 #ifdef CONFIG_KVM_XICS
755 struct kvmppc_icp *icp; /* XICS presentation controller */
756 struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
757 __be32 xive_cam_word; /* Cooked W2 in proper endian with valid bit */
758 u8 xive_pushed; /* Is the VP pushed on the physical CPU ? */
759 u8 xive_esc_on; /* Is the escalation irq enabled ? */
760 union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
761 u64 xive_esc_raddr; /* Escalation interrupt ESB real addr */
762 u64 xive_esc_vaddr; /* Escalation interrupt ESB virt addr */
765 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
766 struct kvm_vcpu_arch_shared shregs;
768 struct mmio_hpte_cache mmio_cache;
769 unsigned long pgfault_addr;
771 unsigned long pgfault_hpte[2];
772 struct mmio_hpte_cache_entry *pgfault_cache;
774 struct task_struct *run_task;
775 struct kvm_run *kvm_run;
777 spinlock_t vpa_update_lock;
778 struct kvmppc_vpa vpa;
779 struct kvmppc_vpa dtl;
780 struct dtl_entry *dtl_ptr;
781 unsigned long dtl_index;
783 struct kvmppc_vpa slb_shadow;
785 spinlock_t tbacct_lock;
793 /* For support of nested guests */
794 struct kvm_nested_guest *nested;
799 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
800 struct kvmhv_tb_accumulator *cur_activity; /* What we're timing */
801 u64 cur_tb_start; /* when it started */
802 struct kvmhv_tb_accumulator rm_entry; /* real-mode entry code */
803 struct kvmhv_tb_accumulator rm_intr; /* real-mode intr handling */
804 struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */
805 struct kvmhv_tb_accumulator guest_time; /* guest execution */
806 struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */
808 struct dentry *debugfs_dir;
809 struct dentry *debugfs_timings;
810 #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
813 #define VCPU_FPR(vcpu, i) (vcpu)->arch.fp.fpr[i][TS_FPROFFSET]
814 #define VCPU_VSX_FPR(vcpu, i, j) ((vcpu)->arch.fp.fpr[i][j])
815 #define VCPU_VSX_VR(vcpu, i) ((vcpu)->arch.vr.vr[i])
817 /* Values for vcpu->arch.state */
818 #define KVMPPC_VCPU_NOTREADY 0
819 #define KVMPPC_VCPU_RUNNABLE 1
820 #define KVMPPC_VCPU_BUSY_IN_HOST 2
822 /* Values for vcpu->arch.io_gpr */
823 #define KVM_MMIO_REG_MASK 0x003f
824 #define KVM_MMIO_REG_EXT_MASK 0xffc0
825 #define KVM_MMIO_REG_GPR 0x0000
826 #define KVM_MMIO_REG_FPR 0x0040
827 #define KVM_MMIO_REG_QPR 0x0080
828 #define KVM_MMIO_REG_FQPR 0x00c0
829 #define KVM_MMIO_REG_VSX 0x0100
830 #define KVM_MMIO_REG_VMX 0x0180
831 #define KVM_MMIO_REG_NESTED_GPR 0xffc0
834 #define __KVM_HAVE_ARCH_WQP
835 #define __KVM_HAVE_CREATE_DEVICE
837 static inline void kvm_arch_hardware_disable(void) {}
838 static inline void kvm_arch_hardware_unsetup(void) {}
839 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
840 static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {}
841 static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
842 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
843 static inline void kvm_arch_exit(void) {}
844 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
845 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
846 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
848 #endif /* __POWERPC_KVM_HOST_H__ */